commit 0da6783a455775772d660f9197cfda62dac4e8d0 Author: Stefan Date: Mon Apr 6 18:48:34 2020 +0200 Import from old repository diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..758bb51 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# Ignore back-up files. +*~ + +# Ignore compiled Python files. +*.pyc +*.pyo + +# Don't include build related files. +/dependencies/ +/dist/ +/build/ + +# And don't care about the 'egg'. +/plaso.egg-info + +# Test files +.coverage +tests-coverage.txt + +# And don't care about the temporary code review file if it exists. +._code_review_number diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..4736328 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,14 @@ +language: python +python: + - "2.7" +before_install: + - sudo add-apt-repository ppa:kristinn-l/plaso-dev -y + - sudo apt-get update -q + - sudo apt-get install binplist libbde-python libesedb-python libevt-python libevtx-python libewf-python libfwsi-python liblnk-python libmsiecf-python libolecf-python libqcow-python libregf-python libsmdev-python libsmraw-python libvhdi-python libvmdk-python libvshadow-python ipython python-bencode python-construct python-dateutil python-dfvfs python-dpkt python-hachoir-core python-hachoir-metadata python-hachoir-parser python-protobuf python-psutil python-pyparsing python-six python-yaml python-tz pytsk3 + - sudo pip install coveralls + - sudo pip install ipython --upgrade +script: + - ./run_tests.py + - coverage run --source=plaso --omit="*_test*,*__init__*,*test_lib*" ./run_tests.py +after_success: + - coveralls diff --git a/ACKNOWLEDGEMENTS b/ACKNOWLEDGEMENTS new file mode 100644 index 0000000..021a81b --- /dev/null +++ b/ACKNOWLEDGEMENTS @@ -0,0 +1,138 @@ +Acknowledgements: plaso + +Copyright 2012 The Plaso Project Authors. +Please see the AUTHORS file for details on individual authors. + +Plaso is a Python rewrite of the log2timeline Perl version. + +Plaso is developed and maintained by: +* Kristinn Gudjonsson +* Eric Mak +* Joachim Metz + +Plaso depends on various other projects. So thanks to the authors +and others involved with these projects: +* Python and modules +* libyaml +* iPython +* PyInstaller +* the SleuthKit +* pytsk +* Hachoir (not included in binary release) + +Thanks to contributors (alphabetically based on last name): +* Brian Baskin + * Parsers + * BEncode + * Java IDX parser +* Johan Berggren + * SQLite plugins + * Zeitgeist activity database +* Petter Bjelland + * Parsers + * Firefox Cache +* Ashley Holtz + * Parsers + * IIS + * Adobe ColdFusion +* Dominique Kilman + * Parsers + * PCAP +* Marc Leavitt + * Parsers + * PL-SQL recall (PLSRecall.dat) +* Preston Miller + *Windows Registry Plugins + * SAM Users + * Shutdown + * USB +* Joaquin Moreno Garijo + * Parsers + * ASL + * BSM + * Cups IPP + * Mac AppFirewall + * Mac KeyChain + * Mac Securityd + * mac_wifi.log + * utmp + * utmpx + * SQLite plugins + * Skype + * Plist plugins + * Airport + * Apple Account + * Install History + * Mac User + * Software Update + * Spotlight + * TimeMachine +* David Nides (@davnads) + * Output modules + * 4n6time SQLite, with thanks to Eric Wong for assistance + * 4n6time MySQL + * Parsers + * Hachoir (meta data) + * OLECF + * OMXL + * Symantec AV Log + * timelib StringToDatetime function + * SQLite plugins + * Google Drive + * Windows Registry plugins + * Office MRU + * Outlook + * Terminal Server Client (RDP) + * Typed Paths + * Typed URLs + * USBStor + * Win7 UserAssist + * WinRar +* Patrik Nisen + * For providing input for parsing the DestList stream for the automatic + destinations OLECF plugin +* Francesco Picasso + * Parsers + * PopContest + * SELinux + * SkyDriveLog + * SkyDriveLogErr + * XChatLog + * XChatScrollBack +* Jordi Sanchez + * For providing: + * binplist + * object filter +* Elizabeth Schweinsberg + * Parsers + * McAfee AV Access Protection Log + * Windows Registry plugins + * MSIE zones +* Marc Séguin + * Windows Registry plugins + * CCleaner +* Keith Wall + * SQLite plugins + * Android calls database + * Android sms database + * updates to the timezone transformation + +Test data: + +Copied with permission from the GRR project: https://github.com/google/grr +* History +* index.dat +* places.sqlite + +Copied with permission granted by Jerome Marty. +* WUAUCLT.EXE-830BCC14.pf + +Copied with permission granted by Rob Lee. +Copyright SANS Institute - Digital Forensics and Incident Response. +* 1b4dd67f29cb1962.automaticDestinations-ms +* 5afe4de1b92fc382.customDestinations-ms +* example.lnk +* SysEvent.Evt +* System.evtx +* Ntuser.dat (multiple instances) +* Windows.edb diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..668de65 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,27 @@ +# Names should be added to this file with this pattern: +# +# For individuals: +# Name (email address) +# +# For organizations: +# Organization (fnmatch pattern) +# +# See python fnmatch module documentation for more information. + +Google Inc. (*@google.com) +Kristinn Gudjonsson (kiddi@kiddaland.net) +Joachim Metz (joachim.metz@gmail.com) +Brian Baskin (brian@thebaskins.com) +David Nides (david.nides@gmail.com) +Dominique Kilman (lexistar97@gmail.com) +Elizabeth Schweinsberg (beth@bethlogic.net) +Eric Mak (ericmak@gmail.com) +Francesco Picasso (francesco.picasso@gmail.com) +Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk) +Keith Wall (kwallster@gmail.com) +Marc Seguin (segumarc@gmail.com) +Oliver Jensen (ojensen5115@gmail.com) +Petter Bjelland (petter.bjelland@gmail.com) +Ashley Holtz (ashley.a.holtz@gmail.com) +Stefan Swerk (stefan_rubanetra@swerk.priv.at) +Preston Miller (preston.miller@dpmforensics.com) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..14e6a3a --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,10 @@ +include ACKNOWLEDGEMENTS AUTHORS LICENSE README +include run_tests.py +include utils/check_dependencies.py +exclude .gitignore +exclude *.pyc +recursive-include config * +recursive-include extra * +recursive-include plaso *.proto +recursive-exclude plaso *.pyc +recursive-include test_data * diff --git a/README b/README new file mode 100644 index 0000000..09c037a --- /dev/null +++ b/README @@ -0,0 +1,31 @@ +plaso (Plaso Langar Að Safna Öllu) - super timeline all the things + +In short, plaso is a Python-based backend engine for the tool log2timeline. + +A longer version: + +log2timeline is a tool designed to extract timestamps from various files found +on a typical computer system(s) and aggregate them. + +The initial purpose of plaso was to collect all timestamped events of interest +on a computer system and have them aggregated in a single place for computer +forensic analysis (aka Super Timeline). + +However plaso has become a framework that supports: +* adding new parsers or parsing plug-ins; +* adding new analysis plug-ins; +* writing one-off scripts to automate repetitive tasks in computer forensic + analysis or equivalent. + +And is moving to support: +* adding new general purpose parses/plugins that may not have timestamps + associated to them; +* adding more analysis context; +* tagging events; +* allowing more targeted approach to the collection/parsing. + +Also see: +* log2timeline: http://plaso.kiddaland.net/usage/log2timeline/ +* Project documentation: http://plaso.kiddaland.net/ +* Downloads: https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/ + diff --git a/README.md b/README.md new file mode 100644 index 0000000..e993510 --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +# plaso (Plaso Langar Að Safna Öllu) # +*super timeline all the things* + +Various statistics for the tool: + +Code Coverage: [![Coverage +Status](https://img.shields.io/coveralls/log2timeline/plaso.svg)](https://coveralls.io/r/log2timeline/plaso?branch=master) + +Build Status: [![Build +Status](https://travis-ci.org/log2timeline/plaso.svg?branch=master)](https://travis-ci.org/log2timeline/plaso) + +In short, plaso is a Python-based backend engine for the tool [log2timeline] (http://plaso.kiddaland.net "Plaso home of the super timeline"). + +## A longer version ## + +log2timeline is a tool designed to extract timestamps from various files found on a typical computer system(s) and aggregate them. + +The initial purpose of plaso was to collect all timestamped events of interest on a computer system and have them aggregated in a single place for computer forensic analysis (aka Super Timeline). + +However plaso has become a framework that supports: +* adding new parsers or parsing plug-ins; +* adding new analysis plug-ins; +* writing one-off scripts to automate repetitive tasks in computer forensic analysis or equivalent. + +And is moving to support: +* adding new general purpose parses/plugins that may not have timestamps associated to them; +* adding more analysis context; +* tagging events; +* allowing more targeted approach to the collection/parsing. + +Also see: +* [log2timeline] (http://plaso.kiddaland.net/usage/log2timeline/ "Usage for log2timeline") +* [Project documentation] (http://plaso.kiddaland.net/ "Tool's main documentation site") +* [Downloads] (https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/ "Download the latest version") + diff --git a/config/dpkg/debian/changelog b/config/dpkg/debian/changelog new file mode 100644 index 0000000..4c93d0c --- /dev/null +++ b/config/dpkg/debian/changelog @@ -0,0 +1,23 @@ +python-plaso (1.1.0-1) unstable; urgency=low + + * Version 1.1.0 development release. + + -- Log2Timeline Sat, 14 Dec 2013 12:15:00 +0100 + +python-plaso (1.0.2-2) unstable; urgency=low + + * Version 1.0.2 alpha release. + + -- Log2Timeline Mon, 28 Oct 2013 12:20:23 -0700 + +python-plaso (1.0.2-1) unstable; urgency=low + + * Version 1.0.2 RC1 release. + + -- Log2Timeline Sat, 19 Oct 2013 09:15:00 +0200 + +python-plaso (1.0-1) unstable; urgency=low + + * Initial release. + + -- Log2Timeline Sat, 8 Dec 2012 09:15:00 +0200 diff --git a/config/dpkg/debian/compat b/config/dpkg/debian/compat new file mode 100644 index 0000000..7f8f011 --- /dev/null +++ b/config/dpkg/debian/compat @@ -0,0 +1 @@ +7 diff --git a/config/dpkg/debian/control b/config/dpkg/debian/control new file mode 100644 index 0000000..89130f1 --- /dev/null +++ b/config/dpkg/debian/control @@ -0,0 +1,16 @@ +Source: python-plaso +Section: unknown +Priority: extra +Maintainer: Log2Timeline +Build-Depends: debhelper (>= 7.0.0), python, python-setuptools +Standards-Version: 3.9.2 +Homepage: https://github.com/log2timeline/plaso/ + +Package: python-plaso +Architecture: all +Depends: binplist, libprotobuf7 | libprotobuf8, libyaml-0-2, libbde-python, libesedb-python, libevt-python, libevtx-python, libewf-python, libfwsi-python, liblnk-python, libmsiecf-python, libolecf-python, libqcow-python, libregf-python, libtsk, libsmdev-python, libsmraw-python, libvhdi-python, libvmdk-python, libvshadow-python, ipython, python-bencode, python-construct, python-dateutil, python-dfvfs, python-dpkt, python-hachoir-core, python-hachoir-metadata, python-hachoir-parser, python-protobuf, python-psutil, python-pyparsing, python-six, python-yaml, python-tz, pytsk3, ${shlibs:Depends}, ${misc:Depends} +Recommends: elasticsearch, libesedb-tools, libbde-tools, libevt-tools, libevtx-tools, libewf-tools, liblnk-tools, libmsiecf-tools, libolecf-tools, libqcow-tools, libregf-tools, libsmdev-tools, libsmraw-tools, libvhdi-tools, libvmdk-tools, libvshadow-tools, libtsk-dev, pyelasticsearch, sleuthkit +Description: Plaso Log2Timeline + Log2Timeline is a framework to create super timelines. + It is a framework to parse various files and collect time-based + digital artifacts that can be used in computer forensics. diff --git a/config/dpkg/debian/copyright b/config/dpkg/debian/copyright new file mode 100644 index 0000000..9da6618 --- /dev/null +++ b/config/dpkg/debian/copyright @@ -0,0 +1,27 @@ +Format: http://dep.debian.net/deps/dep5 +Upstream-Name: plaso +Source: https://github.com/log2timeline/plaso/ + +Files: * +Copyright: 2012 The Plaso Project Authors. +License: Apache-2.0 + +Files: debian/* +Copyright: 2012 The Plaso Project Authors. +License: Apache-2.0 + +License: Apache-2.0 + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + . + http://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + . + On Debian systems, the complete text of the Apache version 2.0 license + can be found in "/usr/share/common-licenses/Apache-2.0". diff --git a/config/dpkg/debian/python-plaso.docs b/config/dpkg/debian/python-plaso.docs new file mode 100644 index 0000000..7aab282 --- /dev/null +++ b/config/dpkg/debian/python-plaso.docs @@ -0,0 +1,4 @@ +ACKNOWLEDGEMENTS +AUTHORS +LICENSE +README diff --git a/config/dpkg/debian/rules b/config/dpkg/debian/rules new file mode 100755 index 0000000..e7fd152 --- /dev/null +++ b/config/dpkg/debian/rules @@ -0,0 +1,45 @@ +#!/usr/bin/make -f +# debian/rules that uses debhelper >= 7. + +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +# This has to be exported to make some magic below work. +export DH_OPTIONS + + +%: + dh $@ + +override_dh_auto_clean: + +override_dh_auto_test: + +override_dh_installmenu: + +override_dh_installmime: + +override_dh_installmodules: + +override_dh_installlogcheck: + +override_dh_installlogrotate: + +override_dh_installpam: + +override_dh_installppp: + +override_dh_installudev: + +override_dh_installwm: + +override_dh_installxfonts: + +override_dh_gconf: + +override_dh_icons: + +override_dh_perl: + +override_dh_pysupport: + diff --git a/config/licenses/LICENSE.PyYAML b/config/licenses/LICENSE.PyYAML new file mode 100644 index 0000000..312c1a1 --- /dev/null +++ b/config/licenses/LICENSE.PyYAML @@ -0,0 +1,20 @@ +Copyright (c) 2006 Kirill Simonov + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/config/licenses/LICENSE.bencode b/config/licenses/LICENSE.bencode new file mode 100644 index 0000000..4b7a674 --- /dev/null +++ b/config/licenses/LICENSE.bencode @@ -0,0 +1,143 @@ +BitTorrent Open Source License + +Version 1.1 + +This BitTorrent Open Source License (the "License") applies to the BitTorrent client and related software products as well as any updates or maintenance releases of that software ("BitTorrent Products") that are distributed by BitTorrent, Inc. ("Licensor"). Any BitTorrent Product licensed pursuant to this License is a Licensed Product. Licensed Product, in its entirety, is protected by U.S. copyright law. This License identifies the terms under which you may use, copy, distribute or modify Licensed Product. + +Preamble + +This Preamble is intended to describe, in plain English, the nature and scope of this License. However, this Preamble is not a part of this license. The legal effect of this License is dependent only upon the terms of the License and not this Preamble. + +This License complies with the Open Source Definition and is derived from the Jabber Open Source License 1.0 (the "JOSL"), which has been approved by Open Source Initiative. Sections 4(c) and 4(f)(iii) from the JOSL have been deleted. + +This License provides that: + +1. You may use or give away the Licensed Product, alone or as a component of an aggregate software distribution containing programs from several different sources. No royalty or other fee is required. + +2. Both Source Code and executable versions of the Licensed Product, including Modifications made by previous Contributors, are available for your use. (The terms "Licensed Product," "Modifications," "Contributors" and "Source Code" are defined in the License.) + +3. You are allowed to make Modifications to the Licensed Product, and you can create Derivative Works from it. (The term "Derivative Works" is defined in the License.) + +4. By accepting the Licensed Product under the provisions of this License, you agree that any Modifications you make to the Licensed Product and then distribute are governed by the provisions of this License. In particular, you must make the Source Code of your Modifications available to others free of charge and without a royalty. + +5. You may sell, accept donations or otherwise receive compensation for executable versions of a Licensed Product, without paying a royalty or other fee to the Licensor or any Contributor, provided that such executable versions contain your or another Contributor?s material Modifications. For the avoidance of doubt, to the extent your executable version of a Licensed Product does not contain your or another Contributor?s material Modifications, you may not sell, accept donations or otherwise receive compensation for such executable. + +You may use the Licensed Product for any purpose, but the Licensor is not providing you any warranty whatsoever, nor is the Licensor accepting any liability in the event that the Licensed Product doesn't work properly or causes you any injury or damages. + +6. If you sublicense the Licensed Product or Derivative Works, you may charge fees for warranty or support, or for accepting indemnity or liability obligations to your customers. You cannot charge for, sell, accept donations or otherwise receive compensation for the Source Code. + +7. If you assert any patent claims against the Licensor relating to the Licensed Product, or if you breach any terms of the License, your rights to the Licensed Product under this License automatically terminate. + +You may use this License to distribute your own Derivative Works, in which case the provisions of this License will apply to your Derivative Works just as they do to the original Licensed Product. + +Alternatively, you may distribute your Derivative Works under any other OSI-approved Open Source license, or under a proprietary license of your choice. If you use any license other than this License, however, you must continue to fulfill the requirements of this License (including the provisions relating to publishing the Source Code) for those portions of your Derivative Works that consist of the Licensed Product, including the files containing Modifications. + +New versions of this License may be published from time to time in connection with new versions of a Licensed Product or otherwise. You may choose to continue to use the license terms in this version of the License for the Licensed Product that was originally licensed hereunder, however, the new versions of this License will at all times apply to new versions of the Licensed Product released by Licensor after the release of the new version of this License. Only the Licensor has the right to change the License terms as they apply to the Licensed Product. + +This License relies on precise definitions for certain terms. Those terms are defined when they are first used, and the definitions are repeated for your convenience in a Glossary at the end of the License. + +License Terms + +1. Grant of License From Licensor. Subject to the terms and conditions of this License, Licensor hereby grants you a world-wide, royalty-free, non-exclusive license, subject to third party intellectual property claims, to do the following: + +a. Use, reproduce, modify, display, perform, sublicense and distribute any Modifications created by a Contributor or portions thereof, in both Source Code or as an executable program, either on an unmodified basis or as part of Derivative Works. + +b. Under claims of patents now or hereafter owned or controlled by Contributor, to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof, but solely to the extent that any such claim is necessary to enable you to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof or Derivative Works thereof. + +2. Grant of License to Modifications From Contributor. "Modifications" means any additions to or deletions from the substance or structure of (i) a file containing a Licensed Product, or (ii) any new file that contains any part of a Licensed Product. Hereinafter in this License, the term "Licensed Product" shall include all previous Modifications that you receive from any Contributor. Subject to the terms and conditions of this License, By application of the provisions in Section 4(a) below, each person or entity who created or contributed to the creation of, and distributed, a Modification (a "Contributor") hereby grants you a world-wide, royalty-free, non-exclusive license, subject to third party intellectual property claims, to do the following: + +a. Use, reproduce, modify, display, perform, sublicense and distribute any Modifications created by such Contributor or portions thereof, in both Source Code or as an executable program, either on an unmodified basis or as part of Derivative Works. + +b. Under claims of patents now or hereafter owned or controlled by Contributor, to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof, but solely to the extent that any such claim is necessary to enable you to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof or Derivative Works thereof. + +3. Exclusions From License Grant. Nothing in this License shall be deemed to grant any rights to trademarks, copyrights, patents, trade secrets or any other intellectual property of Licensor or any Contributor except as expressly stated herein. No patent license is granted separate from the Licensed Product, for code that you delete from the Licensed Product, or for combinations of the Licensed Product with other software or hardware. No right is granted to the trademarks of Licensor or any Contributor even if such marks are included in the Licensed Product. Nothing in this License shall be interpreted to prohibit Licensor from licensing under different terms from this License any code that Licensor otherwise would have a right to license. As an express condition for your use of the Licensed Product, you hereby agree that you will not, without the prior written consent of Licensor, use any trademarks, copyrights, patents, trade secrets or any other intellectual property of Licensor or any Contributor except as expressly stated herein. For the avoidance of doubt and without limiting the foregoing, you hereby agree that you will not use or display any trademark of Licensor or any Contributor in any domain name, directory filepath, advertisement, link or other reference to you in any manner or in any media. + +4. Your Obligations Regarding Distribution. + +a. Application of This License to Your Modifications. As an express condition for your use of the Licensed Product, you hereby agree that any Modifications that you create or to which you contribute, and which you distribute, are governed by the terms of this License including, without limitation, Section 2. Any Modifications that you create or to which you contribute may be distributed only under the terms of this License or a future version of this License released under Section 7. You must include a copy of this License with every copy of the Modifications you distribute. You agree not to offer or impose any terms on any Source Code or executable version of the Licensed Product or Modifications that alter or restrict the applicable version of this License or the recipients' rights hereunder. However, you may include an additional document offering the additional rights described in Section 4(d). + +b. Availability of Source Code. You must make available, without charge, under the terms of this License, the Source Code of the Licensed Product and any Modifications that you distribute, either on the same media as you distribute any executable or other form of the Licensed Product, or via a mechanism generally accepted in the software development community for the electronic transfer of data (an "Electronic Distribution Mechanism"). The Source Code for any version of Licensed Product or Modifications that you distribute must remain available for as long as any executable or other form of the Licensed Product is distributed by you. You are responsible for ensuring that the Source Code version remains available even if the Electronic Distribution Mechanism is maintained by a third party. + +c. Intellectual Property Matters. + + i. Third Party Claims. If you have knowledge that a license to a third party's intellectual property right is required to exercise the rights granted by this License, you must include a text file with the Source Code distribution titled "LEGAL" that describes the claim and the party making the claim in sufficient detail that a recipient will know whom to contact. If you obtain such knowledge after you make any Modifications available as described in Section 4(b), you shall promptly modify the LEGAL file in all copies you make available thereafter and shall take other steps (such as notifying appropriate mailing lists or newsgroups) reasonably calculated to inform those who received the Licensed Product from you that new knowledge has been obtained. + + ii. Contributor APIs. If your Modifications include an application programming interface ("API") and you have knowledge of patent licenses that are reasonably necessary to implement that API, you must also include this information in the LEGAL file. + + iii. Representations. You represent that, except as disclosed pursuant to 4(c)(i) above, you believe that any Modifications you distribute are your original creations and that you have sufficient rights to grant the rights conveyed by this License. + +d. Required Notices. You must duplicate this License in any documentation you provide along with the Source Code of any Modifications you create or to which you contribute, and which you distribute, wherever you describe recipients' rights relating to Licensed Product. You must duplicate the notice contained in Exhibit A (the "Notice") in each file of the Source Code of any copy you distribute of the Licensed Product. If you created a Modification, you may add your name as a Contributor to the Notice. If it is not possible to put the Notice in a particular Source Code file due to its structure, then you must include such Notice in a location (such as a relevant directory file) where a user would be likely to look for such a notice. You may choose to offer, and charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Licensed Product. However, you may do so only on your own behalf, and not on behalf of the Licensor or any Contributor. You must make it clear that any such warranty, support, indemnity or liability obligation is offered by you alone, and you hereby agree to indemnify the Licensor and every Contributor for any liability incurred by the Licensor or such Contributor as a result of warranty, support, indemnity or liability terms you offer. + +e. Distribution of Executable Versions. You may distribute Licensed Product as an executable program under a license of your choice that may contain terms different from this License provided (i) you have satisfied the requirements of Sections 4(a) through 4(d) for that distribution, (ii) you include a conspicuous notice in the executable version, related documentation and collateral materials stating that the Source Code version of the +Licensed Product is available under the terms of this License, including a description of how and where you have fulfilled the obligations of Section 4(b), and (iii) you make it clear that any terms that differ from this License are offered by you alone, not by Licensor or any Contributor. You hereby agree to indemnify the Licensor and every Contributor for any liability incurred by Licensor or such Contributor as a result of any terms you offer. + +f. Distribution of Derivative Works. You may create Derivative Works (e.g., combinations of some or all of the Licensed Product with other code) and distribute the Derivative Works as products under any other license you select, with the proviso that the requirements of this License are fulfilled for those portions of the Derivative Works that consist of the Licensed Product or any Modifications thereto. + +g. Compensation for Distribution of Executable Versions of Licensed Products, Modifications or Derivative Works. Notwithstanding any provision of this License to the contrary, by distributing, selling, licensing, sublicensing or otherwise making available any Licensed Product, or Modification or Derivative Work thereof, you and Licensor hereby acknowledge and agree that you may sell, license or sublicense for a fee, accept donations or otherwise receive compensation for executable versions of a Licensed Product, without paying a royalty or other fee to the Licensor or any other Contributor, provided that such executable versions (i) contain your or another Contributor?s material Modifications, or (ii) are otherwise material Derivative Works. For purposes of this License, an executable version of the Licensed Product will be deemed to contain a material Modification, or will otherwise be deemed a material Derivative Work, if (a) the Licensed Product is modified with your own or a third party?s software programs or other code, and/or the Licensed Product is combined with a number of your own or a third party?s software programs or code, respectively, and (b) such software programs or code add or contribute material value, functionality or features to the License Product. For the avoidance of doubt, to the extent your executable version of a Licensed Product does not contain your or another Contributor?s material Modifications or is otherwise not a material Derivative Work, in each case as contemplated herein, you may not sell, license or sublicense for a fee, accept donations or otherwise receive compensation for such executable. Additionally, without limitation of the foregoing and notwithstanding any provision of this License to the contrary, you cannot charge for, sell, license or sublicense for a fee, accept donations or otherwise receive compensation for the Source Code. + +5. Inability to Comply Due to Statute or Regulation. If it is impossible for you to comply with any of the terms of this License with respect to some or all of the Licensed Product due to statute, judicial order, or regulation, then you must (i) comply with the terms of this License to the maximum extent possible, (ii) cite the statute or regulation that prohibits you from adhering to the License, and (iii) describe the limitations and the code they affect. Such description must be included in the LEGAL file described in Section 4(d), and must be included with all distributions of the Source Code. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill at computer programming to be able to understand it. + +6. Application of This License. This License applies to code to which Licensor or Contributor has attached the Notice in Exhibit A, which is incorporated herein by this reference. + +7. Versions of This License. + +a. New Versions. Licensor may publish from time to time revised and/or new versions of the License. + +b. Effect of New Versions. Once Licensed Product has been published under a particular version of the License, you may always continue to use it under the terms of that version, provided that any such license be in full force and effect at the time, and has not been revoked or otherwise terminated. You may also choose to use such Licensed Product under the terms of any subsequent version (but not any prior version) of the License published by Licensor. No one other than Licensor has the right to modify the terms applicable to Licensed Product created under this License. + +c. Derivative Works of this License. If you create or use a modified version of this License, which you may do only in order to apply it to software that is not already a Licensed Product under this License, you must rename your license so that it is not confusingly similar to this License, and must make it clear that your license contains terms that differ from this License. In so naming your license, you may not use any trademark of Licensor or any Contributor. + +8. Disclaimer of Warranty. LICENSED PRODUCT IS PROVIDED UNDER THIS LICENSE ON AN AS IS BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE LICENSED PRODUCT IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LICENSED PRODUCT IS WITH YOU. SHOULD LICENSED PRODUCT PROVE DEFECTIVE IN ANY RESPECT, YOU (AND NOT THE LICENSOR OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS +DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF LICENSED PRODUCT IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +9. Termination. + +a. Automatic Termination Upon Breach. This license and the rights granted hereunder will terminate automatically if you fail to comply with the terms herein and fail to cure such breach within ten (10) days of being notified of the breach by the Licensor. For purposes of this provision, proof of delivery via email to the address listed in the ?WHOIS? database of the registrar for any website through which you distribute or market any Licensed Product, or to any alternate email address which you designate in writing to the Licensor, shall constitute sufficient notification. All sublicenses to the Licensed Product that are properly granted shall survive any termination of this license so long as they continue to complye with the terms of this License. Provisions that, by their nature, must remain in effect beyond the termination of this License, shall survive. + +b. Termination Upon Assertion of Patent Infringement. If you initiate litigation by asserting a patent infringement claim (excluding declaratory judgment actions) against Licensor or a Contributor (Licensor or Contributor against whom you file such an action is referred to herein as Respondent) alleging that Licensed Product directly or indirectly infringes any patent, then any and all rights granted by such Respondent to you under Sections 1 or 2 of this License shall terminate prospectively upon sixty (60) days notice from Respondent (the "Notice Period") unless within that Notice Period you either agree in writing (i) to pay Respondent a mutually agreeable reasonably royalty for your past or future use of Licensed Product made by such Respondent, or (ii) withdraw your litigation claim with respect to Licensed Product against such Respondent. If within said Notice Period a reasonable royalty and payment arrangement are not mutually agreed upon in writing by the parties or the litigation claim is not withdrawn, the rights granted by Licensor to you under Sections 1 and 2 automatically terminate at the expiration of said Notice Period. + +c. Reasonable Value of This License. If you assert a patent infringement claim against Respondent alleging that Licensed Product directly or indirectly infringes any patent where such claim is resolved (such as by license or settlement) prior to the initiation of patent infringement litigation, then the reasonable value of the licenses granted by said Respondent under Sections 1 and 2 shall be taken into account in determining the amount or value of any payment or license. + +d. No Retroactive Effect of Termination. In the event of termination under Sections 9(a) or 9(b) above, all end user license agreements (excluding licenses to distributors and resellers) that have been validly granted by you or any distributor hereunder prior to termination shall survive termination. + +10. Limitation of Liability. UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL THE LICENSOR, ANY CONTRIBUTOR, OR ANY DISTRIBUTOR OF LICENSED PRODUCT, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTYS NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +11. Responsibility for Claims. As between Licensor and Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License. You agree to work with Licensor and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. + +12. U.S. Government End Users. The Licensed Product is a commercial item, as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of commercial computer software and commercial computer software documentation, as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Licensed Product with only those rights set forth herein. + +13. Miscellaneous. This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by California law provisions (except to the extent applicable law, if any, provides otherwise), excluding its conflict-of-law provisions. You expressly agree that in any litigation relating to this license the losing party shall be responsible for costs including, without limitation, court costs and reasonable attorneys fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation that provides that the language of a contract shall be construed against the drafter shall not apply to this License. + +14. Definition of You in This License. You throughout this License, whether in upper or lower case, means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License or a future version of this License issued under Section 7. For legal entities, you includes any entity that controls, is controlled by, is under common control with, or affiliated with, you. For purposes of this definition, control means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. You are responsible for advising any affiliated entity of the terms of this License, and that any rights or privileges derived from or obtained by way of this License are subject to the restrictions outlined herein. + +15. Glossary. All defined terms in this License that are used in more than one Section of this License are repeated here, in alphabetical order, for the convenience of the reader. The Section of this License in which each defined term is first used is shown in parentheses. + +Contributor: Each person or entity who created or contributed to the creation of, and distributed, a Modification. (See Section 2) + +Derivative Works: That term as used in this License is defined under U.S. copyright law. (See Section 1(b)) + +License: This BitTorrent Open Source License. (See first paragraph of License) + +Licensed Product: Any BitTorrent Product licensed pursuant to this License. The term "Licensed Product" includes all previous Modifications from any Contributor that you receive. (See first paragraph of License and Section 2) + +Licensor: BitTorrent, Inc. (See first paragraph of License) + +Modifications: Any additions to or deletions from the substance or structure of (i) a file containing Licensed Product, or (ii) any new file that contains any part of Licensed Product. (See Section 2) + +Notice: The notice contained in Exhibit A. (See Section 4(e)) + +Source Code: The preferred form for making modifications to the Licensed Product, including all modules contained therein, plus any associated interface definition files, scripts used to control compilation and installation of an executable program, or a list of differential comparisons against the Source Code of the Licensed Product. (See Section 1(a)) + +You: This term is defined in Section 14 of this License. + + +EXHIBIT A + +The Notice below must appear in each file of the Source Code of any copy you distribute of the Licensed Product or any hereto. Contributors to any Modifications may add their own copyright notices to identify their own contributions. + +License: + +The contents of this file are subject to the BitTorrent Open Source License Version 1.0 (the License). You may not copy or use this file, in either source code or executable form, except in compliance with the License. You may obtain a copy of the License at http://www.bittorrent.com/license/. + +Software distributed under the License is distributed on an AS IS basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. + diff --git a/config/licenses/LICENSE.binplist b/config/licenses/LICENSE.binplist new file mode 100644 index 0000000..f60dd92 --- /dev/null +++ b/config/licenses/LICENSE.binplist @@ -0,0 +1,13 @@ +Copyright 2013 Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/config/licenses/LICENSE.construct b/config/licenses/LICENSE.construct new file mode 100644 index 0000000..a3c7898 --- /dev/null +++ b/config/licenses/LICENSE.construct @@ -0,0 +1,21 @@ +Copyright (C) 2006-2013 + Tomer Filiba (tomerfiliba@gmail.com) + Corbin Simpson (MostAwesomeDude@gmail.com) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/config/licenses/LICENSE.dateutil-parser b/config/licenses/LICENSE.dateutil-parser new file mode 100644 index 0000000..f08c6ad --- /dev/null +++ b/config/licenses/LICENSE.dateutil-parser @@ -0,0 +1,29 @@ +dateutil - Extensions to the standard python 2.3+ datetime module. + +Copyright (c) 2003-2011 - Gustavo Niemeyer + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/config/licenses/LICENSE.dfvfs b/config/licenses/LICENSE.dfvfs new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/config/licenses/LICENSE.dfvfs @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/config/licenses/LICENSE.dpkt b/config/licenses/LICENSE.dpkt new file mode 100644 index 0000000..99d1437 --- /dev/null +++ b/config/licenses/LICENSE.dpkt @@ -0,0 +1,28 @@ + + Copyright (c) 2004 Dug Song + All rights reserved, all wrongs reversed. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The names of the authors and copyright holders may not be used to + endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/config/licenses/LICENSE.ipython b/config/licenses/LICENSE.ipython new file mode 100644 index 0000000..64205fe --- /dev/null +++ b/config/licenses/LICENSE.ipython @@ -0,0 +1,85 @@ +============================= + The IPython licensing terms +============================= + +IPython is licensed under the terms of the Modified BSD License (also known as +New or Revised BSD), as follows: + +Copyright (c) 2008-2010, IPython Development Team +Copyright (c) 2001-2007, Fernando Perez. +Copyright (c) 2001, Janko Hauser +Copyright (c) 2001, Nathaniel Gray + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +Neither the name of the IPython Development Team nor the names of its +contributors may be used to endorse or promote products derived from this +software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +About the IPython Development Team +---------------------------------- + +Fernando Perez began IPython in 2001 based on code from Janko Hauser + and Nathaniel Gray . Fernando is still +the project lead. + +The IPython Development Team is the set of all contributors to the IPython +project. This includes all of the IPython subprojects. A full list with +details is kept in the documentation directory, in the file +``about/credits.txt``. + +The core team that coordinates development on GitHub can be found here: +http://github.com/ipython. As of late 2010, it consists of: + +* Brian E. Granger +* Jonathan March +* Evan Patterson +* Fernando Perez +* Min Ragan-Kelley +* Robert Kern + + +Our Copyright Policy +-------------------- + +IPython uses a shared copyright model. Each contributor maintains copyright +over their contributions to IPython. But, it is important to note that these +contributions are typically only changes to the repositories. Thus, the IPython +source code, in its entirety is not the copyright of any single person or +institution. Instead, it is the collective copyright of the entire IPython +Development Team. If individual contributors want to maintain a record of what +changes/contributions they have specific copyright on, they should indicate +their copyright in the commit message of the change, when they commit the +change to one of the IPython repositories. + +With this in mind, the following banner should be used in any source code file +to indicate the copyright and license terms: + +#----------------------------------------------------------------------------- +# Copyright (c) 2010, IPython Development Team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file COPYING.txt, distributed with this software. +#----------------------------------------------------------------------------- diff --git a/config/licenses/LICENSE.libbde b/config/licenses/LICENSE.libbde new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libbde @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libesedb b/config/licenses/LICENSE.libesedb new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libesedb @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libevt b/config/licenses/LICENSE.libevt new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libevt @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libevtx b/config/licenses/LICENSE.libevtx new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libevtx @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libewf b/config/licenses/LICENSE.libewf new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libewf @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libfwsi b/config/licenses/LICENSE.libfwsi new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libfwsi @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.liblnk b/config/licenses/LICENSE.liblnk new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.liblnk @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libmsiecf b/config/licenses/LICENSE.libmsiecf new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libmsiecf @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libolecf b/config/licenses/LICENSE.libolecf new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libolecf @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libqcow b/config/licenses/LICENSE.libqcow new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libqcow @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libregf b/config/licenses/LICENSE.libregf new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libregf @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libsmdev b/config/licenses/LICENSE.libsmdev new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libsmdev @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libsmraw b/config/licenses/LICENSE.libsmraw new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libsmraw @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libvhdi b/config/licenses/LICENSE.libvhdi new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libvhdi @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libvmdk b/config/licenses/LICENSE.libvmdk new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libvmdk @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libvshadow b/config/licenses/LICENSE.libvshadow new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.libvshadow @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/licenses/LICENSE.libyaml b/config/licenses/LICENSE.libyaml new file mode 100644 index 0000000..050ced2 --- /dev/null +++ b/config/licenses/LICENSE.libyaml @@ -0,0 +1,19 @@ +Copyright (c) 2006 Kirill Simonov + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/config/licenses/LICENSE.protobuf b/config/licenses/LICENSE.protobuf new file mode 100644 index 0000000..5c2b52f --- /dev/null +++ b/config/licenses/LICENSE.protobuf @@ -0,0 +1,33 @@ +Copyright 2008, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. +* Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. diff --git a/config/licenses/LICENSE.psutil b/config/licenses/LICENSE.psutil new file mode 100644 index 0000000..e91b135 --- /dev/null +++ b/config/licenses/LICENSE.psutil @@ -0,0 +1,27 @@ +psutil is distributed under BSD license reproduced below. + +Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola' +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the psutil authors nor the names of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/config/licenses/LICENSE.pyelasticsearch b/config/licenses/LICENSE.pyelasticsearch new file mode 100644 index 0000000..11b538e --- /dev/null +++ b/config/licenses/LICENSE.pyelasticsearch @@ -0,0 +1,27 @@ +Copyright (c) 2010 Robert Eanes and contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of pyelasticsearch nor the names of its contributors may be used + to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/config/licenses/LICENSE.pyinstaller b/config/licenses/LICENSE.pyinstaller new file mode 100644 index 0000000..5e36454 --- /dev/null +++ b/config/licenses/LICENSE.pyinstaller @@ -0,0 +1,364 @@ +================================ + The PyInstaller licensing terms +================================ + + +Copyright (c) 2010-2013, PyInstaller Development Team +Copyright (c) 2005-2009, Giovanni Bajo +Based on previous work under copyright (c) 2002 McMillan Enterprises, Inc. + + +PyInstaller is licensed under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + + +Bootloader Exception +-------------------- + +In addition to the permissions in the GNU General Public License, the +authors give you unlimited permission to link or embed compiled bootloader +and related files into combinations with other programs, and to distribute +those combinations without any restriction coming from the use of those +files. (The General Public License restrictions do apply in other respects; +for example, they cover modification of the files, and distribution when +not linked into a combine executable.) + + +Bootloader and Related Files +---------------------------- + +Bootloader and related files are files which are embedded within the +final executable. This includes files in directories: + +./bootloader/ +./PyInstaller/loader + + +About the PyInstaller Development Team +-------------------------------------- + +The PyInstaller Development Team is the set of contributors +to the PyInstaller project. A full list with details is kept +in the documentation directory, in the file +``doc/credits.txt``. + +The core team that coordinates development on GitHub can be found here: +https://github.com/pyinstaller/pyinstaller. As of 2013, it consists of: + +* Giovanni Bajo +* Hartmut Goebel +* Martin Zibricky + + +Our Copyright Policy +-------------------- + +PyInstaller uses a shared copyright model. Each contributor maintains copyright +over their contributions to PyInstaller. But, it is important to note that these +contributions are typically only changes to the repositories. Thus, +the PyInstaller source code, in its entirety is not the copyright of any single +person or institution. Instead, it is the collective copyright of the entire +PyInstaller Development Team. If individual contributors want to maintain +a record of what changes/contributions they have specific copyright on, they +should indicate their copyright in the commit message of the change, when they +commit the change to the PyInstaller repository. + +With this in mind, the following banner should be used in any source code file +to indicate the copyright and license terms: + + +#----------------------------------------------------------------------------- +# Copyright (c) 2013, PyInstaller Development Team. +# +# Distributed under the terms of the GNU General Public License with exception +# for distributing bootloader. +# +# The full license is in the file COPYING.txt, distributed with this software. +#----------------------------------------------------------------------------- + + + +GNU General Public License +-------------------------- + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS diff --git a/config/licenses/LICENSE.pyparsing b/config/licenses/LICENSE.pyparsing new file mode 100644 index 0000000..bbc959e --- /dev/null +++ b/config/licenses/LICENSE.pyparsing @@ -0,0 +1,18 @@ +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/config/licenses/LICENSE.pysqlite b/config/licenses/LICENSE.pysqlite new file mode 100644 index 0000000..793691b --- /dev/null +++ b/config/licenses/LICENSE.pysqlite @@ -0,0 +1,19 @@ +Copyright (c) 2004-2013 Gerhard Häring + +This software is provided 'as-is', without any express or implied warranty. In +no event will the authors be held liable for any damages arising from the use +of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software in + a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source distribution. diff --git a/config/licenses/LICENSE.python b/config/licenses/LICENSE.python new file mode 100644 index 0000000..7b519a9 --- /dev/null +++ b/config/licenses/LICENSE.python @@ -0,0 +1,945 @@ +.. highlightlang:: none + +.. _history-and-license: + +******************* +History and License +******************* + + +History of the software +======================= + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see http://www.cwi.nl/) in the Netherlands as a +successor of a language called ABC. Guido remains Python's principal author, +although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for National +Research Initiatives (CNRI, see http://www.cnri.reston.va.us/) in Reston, +Virginia where he released several versions of the software. + +In May 2000, Guido and the Python core development team moved to BeOpen.com to +form the BeOpen PythonLabs team. In October of the same year, the PythonLabs +team moved to Digital Creations (now Zope Corporation; see +http://www.zope.com/). In 2001, the Python Software Foundation (PSF, see +http://www.python.org/psf/) was formed, a non-profit organization created +specifically to own Python-related Intellectual Property. Zope Corporation is a +sponsoring member of the PSF. + +All Python releases are Open Source (see http://www.opensource.org/ for the Open +Source Definition). Historically, most, but not all, Python releases have also +been GPL-compatible; the table below summarizes the various releases. + ++----------------+--------------+-----------+------------+-----------------+ +| Release | Derived from | Year | Owner | GPL compatible? | ++================+==============+===========+============+=================+ +| 0.9.0 thru 1.2 | n/a | 1991-1995 | CWI | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 1.3 thru 1.5.2 | 1.2 | 1995-1999 | CNRI | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 1.6 | 1.5.2 | 2000 | CNRI | no | ++----------------+--------------+-----------+------------+-----------------+ +| 2.0 | 1.6 | 2000 | BeOpen.com | no | ++----------------+--------------+-----------+------------+-----------------+ +| 1.6.1 | 1.6 | 2001 | CNRI | no | ++----------------+--------------+-----------+------------+-----------------+ +| 2.1 | 2.0+1.6.1 | 2001 | PSF | no | ++----------------+--------------+-----------+------------+-----------------+ +| 2.0.1 | 2.0+1.6.1 | 2001 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.1.1 | 2.1+2.0.1 | 2001 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.2 | 2.1.1 | 2001 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.1.2 | 2.1.1 | 2002 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.1.3 | 2.1.2 | 2002 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.2.1 | 2.2 | 2002 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.2.2 | 2.2.1 | 2002 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.2.3 | 2.2.2 | 2002-2003 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3 | 2.2.2 | 2002-2003 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3.1 | 2.3 | 2002-2003 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3.2 | 2.3.1 | 2003 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3.3 | 2.3.2 | 2003 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3.4 | 2.3.3 | 2004 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3.5 | 2.3.4 | 2005 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.4 | 2.3 | 2004 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.4.1 | 2.4 | 2005 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.4.2 | 2.4.1 | 2005 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.4.3 | 2.4.2 | 2006 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.4.4 | 2.4.3 | 2006 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.5 | 2.4 | 2006 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.5.1 | 2.5 | 2007 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.5.2 | 2.5.1 | 2008 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.5.3 | 2.5.2 | 2008 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.6 | 2.5 | 2008 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.6.1 | 2.6 | 2008 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.6.2 | 2.6.1 | 2009 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.6.3 | 2.6.2 | 2009 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.6.4 | 2.6.3 | 2010 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.7 | 2.6 | 2010 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ + +.. note:: + + GPL-compatible doesn't mean that we're distributing Python under the GPL. All + Python licenses, unlike the GPL, let you distribute a modified version without + making your changes open source. The GPL-compatible licenses make it possible to + combine Python with other software that is released under the GPL; the others + don't. + +Thanks to the many outside volunteers who have worked under Guido's direction to +make these releases possible. + + +Terms and conditions for accessing or otherwise using Python +============================================================ + + +.. centered:: PSF LICENSE AGREEMENT FOR PYTHON |release| + +#. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and + the Individual or Organization ("Licensee") accessing and otherwise using Python + |release| software in source or binary form and its associated documentation. + +#. Subject to the terms and conditions of this License Agreement, PSF hereby + grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, + analyze, test, perform and/or display publicly, prepare derivative works, + distribute, and otherwise use Python |release| alone or in any derivative + version, provided, however, that PSF's License Agreement and PSF's notice of + copyright, i.e., "Copyright © 2001-2013 Python Software Foundation; All Rights + Reserved" are retained in Python |release| alone or in any derivative version + prepared by Licensee. + +#. In the event Licensee prepares a derivative work that is based on or + incorporates Python |release| or any part thereof, and wants to make the + derivative work available to others as provided herein, then Licensee hereby + agrees to include in any such work a brief summary of the changes made to Python + |release|. + +#. PSF is making Python |release| available to Licensee on an "AS IS" basis. + PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF + EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR + WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE + USE OF PYTHON |release| WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + +#. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON |release| + FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF + MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON |release|, OR ANY DERIVATIVE + THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +#. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + +#. Nothing in this License Agreement shall be deemed to create any relationship + of agency, partnership, or joint venture between PSF and Licensee. This License + Agreement does not grant permission to use PSF trademarks or trade name in a + trademark sense to endorse or promote products or services of Licensee, or any + third party. + +#. By copying, installing or otherwise using Python |release|, Licensee agrees + to be bound by the terms and conditions of this License Agreement. + + +.. centered:: BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 + + +.. centered:: BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +#. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an office at + 160 Saratoga Avenue, Santa Clara, CA 95051, and the Individual or Organization + ("Licensee") accessing and otherwise using this software in source or binary + form and its associated documentation ("the Software"). + +#. Subject to the terms and conditions of this BeOpen Python License Agreement, + BeOpen hereby grants Licensee a non-exclusive, royalty-free, world-wide license + to reproduce, analyze, test, perform and/or display publicly, prepare derivative + works, distribute, and otherwise use the Software alone or in any derivative + version, provided, however, that the BeOpen Python License is retained in the + Software, alone or in any derivative version prepared by Licensee. + +#. BeOpen is making the Software available to Licensee on an "AS IS" basis. + BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF + EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND DISCLAIMS ANY REPRESENTATION OR + WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE + USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + +#. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE SOFTWARE FOR + ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF USING, + MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF + ADVISED OF THE POSSIBILITY THEREOF. + +#. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + +#. This License Agreement shall be governed by and interpreted in all respects + by the law of the State of California, excluding conflict of law provisions. + Nothing in this License Agreement shall be deemed to create any relationship of + agency, partnership, or joint venture between BeOpen and Licensee. This License + Agreement does not grant permission to use BeOpen trademarks or trade names in a + trademark sense to endorse or promote products or services of Licensee, or any + third party. As an exception, the "BeOpen Python" logos available at + http://www.pythonlabs.com/logos.html may be used according to the permissions + granted on that web page. + +#. By copying, installing or otherwise using the software, Licensee agrees to be + bound by the terms and conditions of this License Agreement. + + +.. centered:: CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 + +#. This LICENSE AGREEMENT is between the Corporation for National Research + Initiatives, having an office at 1895 Preston White Drive, Reston, VA 20191 + ("CNRI"), and the Individual or Organization ("Licensee") accessing and + otherwise using Python 1.6.1 software in source or binary form and its + associated documentation. + +#. Subject to the terms and conditions of this License Agreement, CNRI hereby + grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, + analyze, test, perform and/or display publicly, prepare derivative works, + distribute, and otherwise use Python 1.6.1 alone or in any derivative version, + provided, however, that CNRI's License Agreement and CNRI's notice of copyright, + i.e., "Copyright © 1995-2001 Corporation for National Research Initiatives; All + Rights Reserved" are retained in Python 1.6.1 alone or in any derivative version + prepared by Licensee. Alternately, in lieu of CNRI's License Agreement, + Licensee may substitute the following text (omitting the quotes): "Python 1.6.1 + is made available subject to the terms and conditions in CNRI's License + Agreement. This Agreement together with Python 1.6.1 may be located on the + Internet using the following unique, persistent identifier (known as a handle): + 1895.22/1013. This Agreement may also be obtained from a proxy server on the + Internet using the following URL: http://hdl.handle.net/1895.22/1013." + +#. In the event Licensee prepares a derivative work that is based on or + incorporates Python 1.6.1 or any part thereof, and wants to make the derivative + work available to others as provided herein, then Licensee hereby agrees to + include in any such work a brief summary of the changes made to Python 1.6.1. + +#. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" basis. CNRI + MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, + BUT NOT LIMITATION, CNRI MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY + OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF + PYTHON 1.6.1 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + +#. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 1.6.1 FOR + ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF + MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, OR ANY DERIVATIVE + THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +#. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + +#. This License Agreement shall be governed by the federal intellectual property + law of the United States, including without limitation the federal copyright + law, and, to the extent such U.S. federal law does not apply, by the law of the + Commonwealth of Virginia, excluding Virginia's conflict of law provisions. + Notwithstanding the foregoing, with regard to derivative works based on Python + 1.6.1 that incorporate non-separable material that was previously distributed + under the GNU General Public License (GPL), the law of the Commonwealth of + Virginia shall govern this License Agreement only as to issues arising under or + with respect to Paragraphs 4, 5, and 7 of this License Agreement. Nothing in + this License Agreement shall be deemed to create any relationship of agency, + partnership, or joint venture between CNRI and Licensee. This License Agreement + does not grant permission to use CNRI trademarks or trade name in a trademark + sense to endorse or promote products or services of Licensee, or any third + party. + +#. By clicking on the "ACCEPT" button where indicated, or by copying, installing + or otherwise using Python 1.6.1, Licensee agrees to be bound by the terms and + conditions of this License Agreement. + + +.. centered:: ACCEPT + + +.. centered:: CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 + +Copyright © 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, The +Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, provided that +the above copyright notice appear in all copies and that both that copyright +notice and this permission notice appear in supporting documentation, and that +the name of Stichting Mathematisch Centrum or CWI not be used in advertising or +publicity pertaining to distribution of the software without specific, written +prior permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS +SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO +EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE FOR ANY SPECIAL, INDIRECT +OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS +SOFTWARE. + + +Licenses and Acknowledgements for Incorporated Software +======================================================= + +This section is an incomplete, but growing list of licenses and acknowledgements +for third-party software incorporated in the Python distribution. + + +Mersenne Twister +---------------- + +The :mod:`_random` module includes code based on a download from +http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html. The following are +the verbatim comments from the original code:: + + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) + + +Sockets +------- + +The :mod:`socket` module uses the functions, :func:`getaddrinfo`, and +:func:`getnameinfo`, which are coded in separate source files from the WIDE +Project, http://www.wide.ad.jp/. :: + + Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the project nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + GAI_ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + FOR GAI_ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON GAI_ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN GAI_ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +Floating point exception control +-------------------------------- + +The source for the :mod:`fpectl` module includes the following notice:: + + --------------------------------------------------------------------- + / Copyright (c) 1996. \ + | The Regents of the University of California. | + | All rights reserved. | + | | + | Permission to use, copy, modify, and distribute this software for | + | any purpose without fee is hereby granted, provided that this en- | + | tire notice is included in all copies of any software which is or | + | includes a copy or modification of this software and in all | + | copies of the supporting documentation for such software. | + | | + | This work was produced at the University of California, Lawrence | + | Livermore National Laboratory under contract no. W-7405-ENG-48 | + | between the U.S. Department of Energy and The Regents of the | + | University of California for the operation of UC LLNL. | + | | + | DISCLAIMER | + | | + | This software was prepared as an account of work sponsored by an | + | agency of the United States Government. Neither the United States | + | Government nor the University of California nor any of their em- | + | ployees, makes any warranty, express or implied, or assumes any | + | liability or responsibility for the accuracy, completeness, or | + | usefulness of any information, apparatus, product, or process | + | disclosed, or represents that its use would not infringe | + | privately-owned rights. Reference herein to any specific commer- | + | cial products, process, or service by trade name, trademark, | + | manufacturer, or otherwise, does not necessarily constitute or | + | imply its endorsement, recommendation, or favoring by the United | + | States Government or the University of California. The views and | + | opinions of authors expressed herein do not necessarily state or | + | reflect those of the United States Government or the University | + | of California, and shall not be used for advertising or product | + \ endorsement purposes. / + --------------------------------------------------------------------- + + +MD5 message digest algorithm +---------------------------- + +The source code for the :mod:`md5` module contains the following notice:: + + Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + L. Peter Deutsch + ghost@aladdin.com + + Independent implementation of MD5 (RFC 1321). + + This code implements the MD5 Algorithm defined in RFC 1321, whose + text is available at + http://www.ietf.org/rfc/rfc1321.txt + The code is derived from the text of the RFC, including the test suite + (section A.5) but excluding the rest of Appendix A. It does not include + any code or documentation that is identified in the RFC as being + copyrighted. + + The original and principal author of md5.h is L. Peter Deutsch + . Other authors are noted in the change history + that follows (in reverse chronological order): + + 2002-04-13 lpd Removed support for non-ANSI compilers; removed + references to Ghostscript; clarified derivation from RFC 1321; + now handles byte order either statically or dynamically. + 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. + 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5); + added conditionalization for C++ compilation from Martin + Purschke . + 1999-05-03 lpd Original version. + + +Asynchronous socket services +---------------------------- + +The :mod:`asynchat` and :mod:`asyncore` modules contain the following notice:: + + Copyright 1996 by Sam Rushing + + All Rights Reserved + + Permission to use, copy, modify, and distribute this software and + its documentation for any purpose and without fee is hereby + granted, provided that the above copyright notice appear in all + copies and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of Sam + Rushing not be used in advertising or publicity pertaining to + distribution of the software without specific, written prior + permission. + + SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN + NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR + CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +Cookie management +----------------- + +The :mod:`Cookie` module contains the following notice:: + + Copyright 2000 by Timothy O'Malley + + All Rights Reserved + + Permission to use, copy, modify, and distribute this software + and its documentation for any purpose and without fee is hereby + granted, provided that the above copyright notice appear in all + copies and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of + Timothy O'Malley not be used in advertising or publicity + pertaining to distribution of the software without specific, written + prior permission. + + Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR + ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE. + + +Execution tracing +----------------- + +The :mod:`trace` module contains the following notice:: + + portions copyright 2001, Autonomous Zones Industries, Inc., all rights... + err... reserved and offered to the public under the terms of the + Python 2.2 license. + Author: Zooko O'Whielacronx + http://zooko.com/ + mailto:zooko@zooko.com + + Copyright 2000, Mojam Media, Inc., all rights reserved. + Author: Skip Montanaro + + Copyright 1999, Bioreason, Inc., all rights reserved. + Author: Andrew Dalke + + Copyright 1995-1997, Automatrix, Inc., all rights reserved. + Author: Skip Montanaro + + Copyright 1991-1995, Stichting Mathematisch Centrum, all rights reserved. + + + Permission to use, copy, modify, and distribute this Python software and + its associated documentation for any purpose without fee is hereby + granted, provided that the above copyright notice appears in all copies, + and that both that copyright notice and this permission notice appear in + supporting documentation, and that the name of neither Automatrix, + Bioreason or Mojam Media be used in advertising or publicity pertaining to + distribution of the software without specific, written prior permission. + + +UUencode and UUdecode functions +------------------------------- + +The :mod:`uu` module contains the following notice:: + + Copyright 1994 by Lance Ellinghouse + Cathedral City, California Republic, United States of America. + All Rights Reserved + Permission to use, copy, modify, and distribute this software and its + documentation for any purpose and without fee is hereby granted, + provided that the above copyright notice appear in all copies and that + both that copyright notice and this permission notice appear in + supporting documentation, and that the name of Lance Ellinghouse + not be used in advertising or publicity pertaining to distribution + of the software without specific, written prior permission. + LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO + THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE CENTRUM BE LIABLE + FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Modified by Jack Jansen, CWI, July 1995: + - Use binascii module to do the actual line-by-line conversion + between ascii and binary. This results in a 1000-fold speedup. The C + version is still 5 times faster, though. + - Arguments more compliant with Python standard + + +XML Remote Procedure Calls +-------------------------- + +The :mod:`xmlrpclib` module contains the following notice:: + + The XML-RPC client interface is + + Copyright (c) 1999-2002 by Secret Labs AB + Copyright (c) 1999-2002 by Fredrik Lundh + + By obtaining, using, and/or copying this software and/or its + associated documentation, you agree that you have read, understood, + and will comply with the following terms and conditions: + + Permission to use, copy, modify, and distribute this software and + its associated documentation for any purpose and without fee is + hereby granted, provided that the above copyright notice appears in + all copies, and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of + Secret Labs AB or the author not be used in advertising or publicity + pertaining to distribution of the software without specific, written + prior permission. + + SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD + TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- + ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR + BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY + DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + OF THIS SOFTWARE. + + +test_epoll +---------- + +The :mod:`test_epoll` contains the following notice:: + + Copyright (c) 2001-2006 Twisted Matrix Laboratories. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Select kqueue +------------- + +The :mod:`select` and contains the following notice for the kqueue interface:: + + Copyright (c) 2000 Doug White, 2006 James Knight, 2007 Christian Heimes + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +strtod and dtoa +--------------- + +The file :file:`Python/dtoa.c`, which supplies C functions dtoa and +strtod for conversion of C doubles to and from strings, is derived +from the file of the same name by David M. Gay, currently available +from http://www.netlib.org/fp/. The original file, as retrieved on +March 16, 2009, contains the following copyright and licensing +notice:: + + /**************************************************************** + * + * The author of this software is David M. Gay. + * + * Copyright (c) 1991, 2000, 2001 by Lucent Technologies. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * + ***************************************************************/ + + +OpenSSL +------- + +The modules :mod:`hashlib`, :mod:`posix`, :mod:`ssl`, :mod:`crypt` use +the OpenSSL library for added performance if made available by the +operating system. Additionally, the Windows installers for Python +include a copy of the OpenSSL libraries, so we include a copy of the +OpenSSL license here:: + + + LICENSE ISSUES + ============== + + The OpenSSL toolkit stays under a dual license, i.e. both the conditions of + the OpenSSL License and the original SSLeay license apply to the toolkit. + See below for the actual license texts. Actually both licenses are BSD-style + Open Source licenses. In case of any license issues related to OpenSSL + please contact openssl-core@openssl.org. + + OpenSSL License + --------------- + + /* ==================================================================== + * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + + Original SSLeay License + ----------------------- + + /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + + +expat +----- + +The :mod:`pyexpat` extension is built using an included copy of the expat +sources unless the build is configured ``--with-system-expat``:: + + Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd + and Clark Cooper + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +libffi +------ + +The :mod:`_ctypes` extension is built using an included copy of the libffi +sources unless the build is configured ``--with-system-libffi``:: + + Copyright (c) 1996-2008 Red Hat, Inc and others. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + +zlib +---- + +The :mod:`zlib` extension is built using an included copy of the zlib +sources if the zlib version found on the system is too old to be +used for the build:: + + Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + diff --git a/config/licenses/LICENSE.pytsk b/config/licenses/LICENSE.pytsk new file mode 100644 index 0000000..1d151a1 --- /dev/null +++ b/config/licenses/LICENSE.pytsk @@ -0,0 +1,13 @@ +Copyright 2010 Michael Cohen + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/config/licenses/LICENSE.pytz b/config/licenses/LICENSE.pytz new file mode 100644 index 0000000..5e12fcc --- /dev/null +++ b/config/licenses/LICENSE.pytz @@ -0,0 +1,19 @@ +Copyright (c) 2003-2009 Stuart Bishop + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/config/licenses/LICENSE.pywin32 b/config/licenses/LICENSE.pywin32 new file mode 100644 index 0000000..fa340d7 --- /dev/null +++ b/config/licenses/LICENSE.pywin32 @@ -0,0 +1,30 @@ +Unless stated in the specfic source file, this work is +Copyright (c) 1994-2008, Mark Hammond +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the distribution. + +Neither name of Mark Hammond nor the name of contributors may be used +to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS +IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/config/licenses/LICENSE.six b/config/licenses/LICENSE.six new file mode 100644 index 0000000..d76e024 --- /dev/null +++ b/config/licenses/LICENSE.six @@ -0,0 +1,18 @@ +Copyright (c) 2010-2014 Benjamin Peterson + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/config/licenses/LICENSE.sleuthkit.IBM b/config/licenses/LICENSE.sleuthkit.IBM new file mode 100644 index 0000000..26f289d --- /dev/null +++ b/config/licenses/LICENSE.sleuthkit.IBM @@ -0,0 +1,221 @@ +IBM PUBLIC LICENSE VERSION 1.0 - CORONER TOOLKIT UTILITIES + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS IBM PUBLIC +LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE +PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + a) in the case of International Business Machines Corporation ("IBM"), + the Original Program, and + b) in the case of each Contributor, + i) changes to the Program, and + ii) additions to the Program; + where such changes and/or additions to the Program originate + from and are distributed by that particular Contributor. + A Contribution 'originates' from a Contributor if it was added + to the Program by such Contributor itself or anyone acting on + such Contributor's behalf. + Contributions do not include additions to the Program which: + (i) are separate modules of software distributed in conjunction + with the Program under their own license agreement, and + (ii) are not derivative works of the Program. + +"Contributor" means IBM and any other entity that distributes the Program. + +"Licensed Patents " mean patent claims licensable by a Contributor which +are necessarily infringed by the use or sale of its Contribution alone +or when combined with the Program. + +"Original Program" means the original version of the software accompanying +this Agreement as released by IBM, including source code, object code +and documentation, if any. + +"Program" means the Original Program and Contributions. + +"Recipient" means anyone who receives the Program under this Agreement, +including all Contributors. + +2. GRANT OF RIGHTS + + a) Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free copyright + license to reproduce, prepare derivative works of, publicly display, + publicly perform, distribute and sublicense the Contribution of such + Contributor, if any, and such derivative works, in source code and + object code form. + + b) Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free patent + license under Licensed Patents to make, use, sell, offer to sell, + import and otherwise transfer the Contribution of such Contributor, + if any, in source code and object code form. This patent license + shall apply to the combination of the Contribution and the Program + if, at the time the Contribution is added by the Contributor, such + addition of the Contribution causes such combination to be covered + by the Licensed Patents. The patent license shall not apply to any + other combinations which include the Contribution. No hardware per + se is licensed hereunder. + + c) Recipient understands that although each Contributor grants the + licenses to its Contributions set forth herein, no assurances are + provided by any Contributor that the Program does not infringe the + patent or other intellectual property rights of any other entity. + Each Contributor disclaims any liability to Recipient for claims + brought by any other entity based on infringement of intellectual + property rights or otherwise. As a condition to exercising the rights + and licenses granted hereunder, each Recipient hereby assumes sole + responsibility to secure any other intellectual property rights + needed, if any. For example, if a third party patent license + is required to allow Recipient to distribute the Program, it is + Recipient's responsibility to acquire that license before distributing + the Program. + + d) Each Contributor represents that to its knowledge it has sufficient + copyright rights in its Contribution, if any, to grant the copyright + license set forth in this Agreement. + +3. REQUIREMENTS + +A Contributor may choose to distribute the Program in object code form +under its own license agreement, provided that: + a) it complies with the terms and conditions of this Agreement; and + b) its license agreement: + i) effectively disclaims on behalf of all Contributors all + warranties and conditions, express and implied, including + warranties or conditions of title and non-infringement, and + implied warranties or conditions of merchantability and fitness + for a particular purpose; + ii) effectively excludes on behalf of all Contributors all + liability for damages, including direct, indirect, special, + incidental and consequential damages, such as lost profits; + iii) states that any provisions which differ from this Agreement + are offered by that Contributor alone and not by any other + party; and + iv) states that source code for the Program is available from + such Contributor, and informs licensees how to obtain it in a + reasonable manner on or through a medium customarily used for + software exchange. + +When the Program is made available in source code form: + a) it must be made available under this Agreement; and + b) a copy of this Agreement must be included with each copy of the + Program. + +Each Contributor must include the following in a conspicuous location +in the Program: + + Copyright (c) 1997,1998,1999, International Business Machines + Corporation and others. All Rights Reserved. + +In addition, each Contributor must identify itself as the originator of +its Contribution, if any, in a manner that reasonably allows subsequent +Recipients to identify the originator of the Contribution. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities +with respect to end users, business partners and the like. While this +license is intended to facilitate the commercial use of the Program, the +Contributor who includes the Program in a commercial product offering +should do so in a manner which does not create potential liability for +other Contributors. Therefore, if a Contributor includes the Program in +a commercial product offering, such Contributor ("Commercial Contributor") +hereby agrees to defend and indemnify every other Contributor +("Indemnified Contributor") against any losses, damages and costs +(collectively "Losses") arising from claims, lawsuits and other legal +actions brought by a third party against the Indemnified Contributor to +the extent caused by the acts or omissions of such Commercial Contributor +in connection with its distribution of the Program in a commercial +product offering. The obligations in this section do not apply to any +claims or Losses relating to any actual or alleged intellectual property +infringement. In order to qualify, an Indemnified Contributor must: + a) promptly notify the Commercial Contributor in writing of such claim, +and + b) allow the Commercial Contributor to control, and cooperate with + the Commercial Contributor in, the defense and any related + settlement negotiations. The Indemnified Contributor may + participate in any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial +product offering, Product X. That Contributor is then a Commercial +Contributor. If that Commercial Contributor then makes performance +claims, or offers warranties related to Product X, those performance +claims and warranties are such Commercial Contributor's responsibility +alone. Under this section, the Commercial Contributor would have to +defend claims against the other Contributors related to those performance +claims and warranties, and if a court requires any other Contributor to +pay any damages as a result, the Commercial Contributor must pay those +damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED +ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER +EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR +CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A +PARTICULAR PURPOSE. Each Recipient is solely responsible for determining +the appropriateness of using and distributing the Program and assumes +all risks associated with its exercise of rights under this Agreement, +including but not limited to the risks and costs of program errors, +compliance with applicable laws, damage to or loss of data, programs or +equipment, and unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR +ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING +WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION +OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under +applicable law, it shall not affect the validity or enforceability of +the remainder of the terms of this Agreement, and without further action +by the parties hereto, such provision shall be reformed to the minimum +extent necessary to make such provision valid and enforceable. + +If Recipient institutes patent litigation against a Contributor with +respect to a patent applicable to software (including a cross-claim or +counterclaim in a lawsuit), then any patent licenses granted by that +Contributor to such Recipient under this Agreement shall terminate +as of the date such litigation is filed. In addition, If Recipient +institutes patent litigation against any entity (including a cross-claim +or counterclaim in a lawsuit) alleging that the Program itself (excluding +combinations of the Program with other software or hardware) infringes +such Recipient's patent(s), then such Recipient's rights granted under +Section 2(b) shall terminate as of the date such litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it fails +to comply with any of the material terms or conditions of this Agreement +and does not cure such failure in a reasonable period of time after +becoming aware of such noncompliance. If all Recipient's rights under +this Agreement terminate, Recipient agrees to cease use and distribution +of the Program as soon as reasonably practicable. However, Recipient's +obligations under this Agreement and any licenses granted by Recipient +relating to the Program shall continue and survive. + +IBM may publish new versions (including revisions) of this Agreement +from time to time. Each new version of the Agreement will be given a +distinguishing version number. The Program (including Contributions) +may always be distributed subject to the version of the Agreement under +which it was received. In addition, after a new version of the Agreement +is published, Contributor may elect to distribute the Program (including +its Contributions) under the new version. No one other than IBM has the +right to modify this Agreement. Except as expressly stated in Sections +2(a) and 2(b) above, Recipient receives no rights or licenses to the +intellectual property of any Contributor under this Agreement, whether +expressly, by implication, estoppel or otherwise. All rights in the +Program not expressly granted under this Agreement are reserved. + +This Agreement is governed by the laws of the State of New York and the +intellectual property laws of the United States of America. No party to +this Agreement will bring a legal action under this Agreement more than +one year after the cause of action arose. Each party waives its rights +to a jury trial in any resulting litigation. diff --git a/config/licenses/LICENSE.sleuthkit.cpl1.0 b/config/licenses/LICENSE.sleuthkit.cpl1.0 new file mode 100644 index 0000000..c9990a7 --- /dev/null +++ b/config/licenses/LICENSE.sleuthkit.cpl1.0 @@ -0,0 +1,213 @@ +Common Public License Version 1.0 + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC +LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM +CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + + a) in the case of the initial Contributor, the initial code and +documentation distributed under this Agreement, and + + b) in the case of each subsequent Contributor: + + i) changes to the Program, and + + ii) additions to the Program; + + where such changes and/or additions to the Program originate from and are +distributed by that particular Contributor. A Contribution 'originates' from a +Contributor if it was added to the Program by such Contributor itself or anyone +acting on such Contributor's behalf. Contributions do not include additions to +the Program which: (i) are separate modules of software distributed in +conjunction with the Program under their own license agreement, and (ii) are not +derivative works of the Program. + +"Contributor" means any person or entity that distributes the Program. + +"Licensed Patents " mean patent claims licensable by a Contributor which are +necessarily infringed by the use or sale of its Contribution alone or when +combined with the Program. + +"Program" means the Contributions distributed in accordance with this Agreement. + +"Recipient" means anyone who receives the Program under this Agreement, +including all Contributors. + +2. GRANT OF RIGHTS + + a) Subject to the terms of this Agreement, each Contributor hereby grants +Recipient a non-exclusive, worldwide, royalty-free copyright license to +reproduce, prepare derivative works of, publicly display, publicly perform, +distribute and sublicense the Contribution of such Contributor, if any, and such +derivative works, in source code and object code form. + + b) Subject to the terms of this Agreement, each Contributor hereby grants +Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed +Patents to make, use, sell, offer to sell, import and otherwise transfer the +Contribution of such Contributor, if any, in source code and object code form. +This patent license shall apply to the combination of the Contribution and the +Program if, at the time the Contribution is added by the Contributor, such +addition of the Contribution causes such combination to be covered by the +Licensed Patents. The patent license shall not apply to any other combinations +which include the Contribution. No hardware per se is licensed hereunder. + + c) Recipient understands that although each Contributor grants the licenses +to its Contributions set forth herein, no assurances are provided by any +Contributor that the Program does not infringe the patent or other intellectual +property rights of any other entity. Each Contributor disclaims any liability to +Recipient for claims brought by any other entity based on infringement of +intellectual property rights or otherwise. As a condition to exercising the +rights and licenses granted hereunder, each Recipient hereby assumes sole +responsibility to secure any other intellectual property rights needed, if any. +For example, if a third party patent license is required to allow Recipient to +distribute the Program, it is Recipient's responsibility to acquire that license +before distributing the Program. + + d) Each Contributor represents that to its knowledge it has sufficient +copyright rights in its Contribution, if any, to grant the copyright license set +forth in this Agreement. + +3. REQUIREMENTS + +A Contributor may choose to distribute the Program in object code form under its +own license agreement, provided that: + + a) it complies with the terms and conditions of this Agreement; and + + b) its license agreement: + + i) effectively disclaims on behalf of all Contributors all warranties and +conditions, express and implied, including warranties or conditions of title and +non-infringement, and implied warranties or conditions of merchantability and +fitness for a particular purpose; + + ii) effectively excludes on behalf of all Contributors all liability for +damages, including direct, indirect, special, incidental and consequential +damages, such as lost profits; + + iii) states that any provisions which differ from this Agreement are offered +by that Contributor alone and not by any other party; and + + iv) states that source code for the Program is available from such +Contributor, and informs licensees how to obtain it in a reasonable manner on or +through a medium customarily used for software exchange. + +When the Program is made available in source code form: + + a) it must be made available under this Agreement; and + + b) a copy of this Agreement must be included with each copy of the Program. + +Contributors may not remove or alter any copyright notices contained within the +Program. + +Each Contributor must identify itself as the originator of its Contribution, if +any, in a manner that reasonably allows subsequent Recipients to identify the +originator of the Contribution. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities with +respect to end users, business partners and the like. While this license is +intended to facilitate the commercial use of the Program, the Contributor who +includes the Program in a commercial product offering should do so in a manner +which does not create potential liability for other Contributors. Therefore, if +a Contributor includes the Program in a commercial product offering, such +Contributor ("Commercial Contributor") hereby agrees to defend and indemnify +every other Contributor ("Indemnified Contributor") against any losses, damages +and costs (collectively "Losses") arising from claims, lawsuits and other legal +actions brought by a third party against the Indemnified Contributor to the +extent caused by the acts or omissions of such Commercial Contributor in +connection with its distribution of the Program in a commercial product +offering. The obligations in this section do not apply to any claims or Losses +relating to any actual or alleged intellectual property infringement. In order +to qualify, an Indemnified Contributor must: a) promptly notify the Commercial +Contributor in writing of such claim, and b) allow the Commercial Contributor to +control, and cooperate with the Commercial Contributor in, the defense and any +related settlement negotiations. The Indemnified Contributor may participate in +any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial product +offering, Product X. That Contributor is then a Commercial Contributor. If that +Commercial Contributor then makes performance claims, or offers warranties +related to Product X, those performance claims and warranties are such +Commercial Contributor's responsibility alone. Under this section, the +Commercial Contributor would have to defend claims against the other +Contributors related to those performance claims and warranties, and if a court +requires any other Contributor to pay any damages as a result, the Commercial +Contributor must pay those damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR +IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, +NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each +Recipient is solely responsible for determining the appropriateness of using and +distributing the Program and assumes all risks associated with its exercise of +rights under this Agreement, including but not limited to the risks and costs of +program errors, compliance with applicable laws, damage to or loss of data, +programs or equipment, and unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY +CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST +PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS +GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under applicable +law, it shall not affect the validity or enforceability of the remainder of the +terms of this Agreement, and without further action by the parties hereto, such +provision shall be reformed to the minimum extent necessary to make such +provision valid and enforceable. + +If Recipient institutes patent litigation against a Contributor with respect to +a patent applicable to software (including a cross-claim or counterclaim in a +lawsuit), then any patent licenses granted by that Contributor to such Recipient +under this Agreement shall terminate as of the date such litigation is filed. In +addition, if Recipient institutes patent litigation against any entity +(including a cross-claim or counterclaim in a lawsuit) alleging that the Program +itself (excluding combinations of the Program with other software or hardware) +infringes such Recipient's patent(s), then such Recipient's rights granted under +Section 2(b) shall terminate as of the date such litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it fails to +comply with any of the material terms or conditions of this Agreement and does +not cure such failure in a reasonable period of time after becoming aware of +such noncompliance. If all Recipient's rights under this Agreement terminate, +Recipient agrees to cease use and distribution of the Program as soon as +reasonably practicable. However, Recipient's obligations under this Agreement +and any licenses granted by Recipient relating to the Program shall continue and +survive. + +Everyone is permitted to copy and distribute copies of this Agreement, but in +order to avoid inconsistency the Agreement is copyrighted and may only be +modified in the following manner. The Agreement Steward reserves the right to +publish new versions (including revisions) of this Agreement from time to time. +No one other than the Agreement Steward has the right to modify this Agreement. +IBM is the initial Agreement Steward. IBM may assign the responsibility to serve +as the Agreement Steward to a suitable separate entity. Each new version of the +Agreement will be given a distinguishing version number. The Program (including +Contributions) may always be distributed subject to the version of the Agreement +under which it was received. In addition, after a new version of the Agreement +is published, Contributor may elect to distribute the Program (including its +Contributions) under the new version. Except as expressly stated in Sections +2(a) and 2(b) above, Recipient receives no rights or licenses to the +intellectual property of any Contributor under this Agreement, whether +expressly, by implication, estoppel or otherwise. All rights in the Program not +expressly granted under this Agreement are reserved. + +This Agreement is governed by the laws of the State of New York and the +intellectual property laws of the United States of America. No party to this +Agreement will bring a legal action under this Agreement more than one year +after the cause of action arose. Each party waives its rights to a jury trial in +any resulting litigation. diff --git a/config/licenses/LICENSE.talloc b/config/licenses/LICENSE.talloc new file mode 100644 index 0000000..3f7b8b1 --- /dev/null +++ b/config/licenses/LICENSE.talloc @@ -0,0 +1,166 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + diff --git a/config/logo.jpg b/config/logo.jpg new file mode 100644 index 0000000..903c849 Binary files /dev/null and b/config/logo.jpg differ diff --git a/config/macosx/Readme.txt b/config/macosx/Readme.txt new file mode 100644 index 0000000..c5daceb --- /dev/null +++ b/config/macosx/Readme.txt @@ -0,0 +1,14 @@ +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +| PLASO INSTALLER README +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +This is a simple installer for plaso. + +Simply run the install.sh script as root (sudo ./install.sh) and all shall +be installed and work as it should. + +What the installer does is to install all dependencies to plaso as well as +plaso and dfvfs as separate packages. + +More documentation: http://plaso.kiddaland.net + +Questions/comments/thoughts? send them to log2timeline-discuss@googlegroups.com diff --git a/config/macosx/install.sh.in b/config/macosx/install.sh.in new file mode 100755 index 0000000..af7e17b --- /dev/null +++ b/config/macosx/install.sh.in @@ -0,0 +1,56 @@ +#!/bin/bash +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This is a simple installer script for the Mac OS X platform. + +EXIT_SUCCESS=0; +EXIT_FAILURE=1; + +echo "===============================================================" +echo " PLASO INSTALLER" +echo "===============================================================" + +if test "$USER" != "root"; +then + echo "This script requires root privileges. Running: sudo."; + sudo ls > /dev/null + + if test $? -ne 0; + then + echo "Do you have root privileges?"; + + exit ${EXIT_FAILURE}; + fi +fi + +VOLUME_NAME="/Volumes/@VOLUMENAME@"; + +if ! test -d ${VOLUME_NAME}; +then + echo "Unable to find installation directory: ${VOLUME_NAME}"; + + exit ${EXIT_FAILURE}; +fi + +echo "Installing packages."; + +find ${VOLUME_NAME} -name "*.pkg" -exec sudo installer -target / -pkg {} \; + +echo "Done."; + +exit ${EXIT_SUCCESS}; + diff --git a/config/macosx/make_dist.sh b/config/macosx/make_dist.sh new file mode 100755 index 0000000..16842f5 --- /dev/null +++ b/config/macosx/make_dist.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Script to make a plaso Mac OS X distribution package. + +EXIT_SUCCESS=0; +EXIT_FAILURE=1; + +if ! test -d dependencies; +then + echo "Missing dependencies directory."; + + exit ${EXIT_FAILURE}; +fi + +if ! test -d config; +then + echo "Missing config directory."; + + exit ${EXIT_FAILURE}; +fi + +MACOSX_VERSION=`sw_vers -productVersion | awk -F '.' '{print $1 "." $2}'`; +PLASO_VERSION=`grep -e '^__version' plaso/__init__.py | sed -e "s/^[^=]*= '\([^']*\)'/\1/g"`; + +if ! test -z $1; +then + PLASO_VERSION="${PLASO_VERSION}-$1"; +fi + +if ! test -f ../python-plaso-${PLASO_VERSION}.pkg; +then + echo "Missing plaso package: ../python-plaso-${PLASO_VERSION}.pkg file."; + + exit ${EXIT_FAILURE}; +fi + +DISTDIR="plaso-${PLASO_VERSION}"; + +if test -d "${DISTDIR}"; +then + echo "Distribution directory: ${DISTDIR} already exists."; + + exit ${EXIT_FAILURE}; +fi + +mkdir "${DISTDIR}"; +cp -r config/licenses "${DISTDIR}"; +cp config/macosx/Readme.txt "${DISTDIR}"; + +sed "s/@VOLUMENAME@/${DISTDIR}/" config/macosx/install.sh.in > "${DISTDIR}/install.sh"; + +mkdir "${DISTDIR}/packages"; +cp dependencies/*.pkg "${DISTDIR}/packages"; +cp ../python-plaso-${PLASO_VERSION}.pkg "${DISTDIR}/packages"; + +hdiutil create ../plaso-${PLASO_VERSION}_macosx-${MACOSX_VERSION}.dmg -srcfolder "${DISTDIR}/" -fs HFS+; + +exit ${EXIT_SUCCESS}; + diff --git a/config/windows/make.bat b/config/windows/make.bat new file mode 100644 index 0000000..46f33db --- /dev/null +++ b/config/windows/make.bat @@ -0,0 +1,16 @@ +@echo off +del /q /s build dist 2> NUL +rmdir /q /s build dist 2> NUL + +set PYTHONPATH=. + +C:\Python27\python.exe ..\pyinstaller\pyinstaller.py --onedir plaso\frontend\image_export.py +C:\Python27\python.exe ..\pyinstaller\pyinstaller.py --onedir plaso\frontend\log2timeline.py +C:\Python27\python.exe ..\pyinstaller\pyinstaller.py --onedir plaso\frontend\pinfo.py +C:\Python27\python.exe ..\pyinstaller\pyinstaller.py --onedir plaso\frontend\plasm.py +C:\Python27\python.exe ..\pyinstaller\pyinstaller.py --onedir plaso\frontend\pprof.py +C:\Python27\python.exe ..\pyinstaller\pyinstaller.py --onedir plaso\frontend\preg.py +C:\Python27\python.exe ..\pyinstaller\pyinstaller.py --onedir plaso\frontend\pshell.py +C:\Python27\python.exe ..\pyinstaller\pyinstaller.py --onedir plaso\frontend\psort.py + +set PYTHONPATH= diff --git a/config/windows/make_check.bat b/config/windows/make_check.bat new file mode 100644 index 0000000..77d893b --- /dev/null +++ b/config/windows/make_check.bat @@ -0,0 +1,12 @@ +@echo off +@rem Script to make sure the executables run after make_dist.bat. + +dist\plaso\image_export.exe -h +dist\plaso\log2timeline.exe --info +dist\plaso\pinfo.exe -v test_data\psort_test.out +dist\plaso\plasm.exe -h +dist\plaso\pprof.exe +dist\plaso\preg.exe -h +dist\plaso\psort.exe +dist\plaso\pshell.exe + diff --git a/config/windows/make_dist.bat b/config/windows/make_dist.bat new file mode 100644 index 0000000..8866c23 --- /dev/null +++ b/config/windows/make_dist.bat @@ -0,0 +1,22 @@ +@echo off +del /q /s dist\plaso 2> NUL + +rmdir /q /s dist\plaso 2> NUL + +mkdir dist\plaso +mkdir dist\plaso\licenses + +xcopy /q /y ACKNOWLEDGEMENTS dist\plaso +xcopy /q /y AUTHORS dist\plaso +xcopy /q /y LICENSE dist\plaso +xcopy /q /y README dist\plaso +xcopy /q /y config\licenses\* dist\plaso\licenses + +xcopy /q /y /s dist\image_export\* dist\plaso +xcopy /q /y /s dist\log2timeline\* dist\plaso +xcopy /q /y /s dist\pinfo\* dist\plaso +xcopy /q /y /s dist\plasm\* dist\plaso +xcopy /q /y /s dist\pprof\* dist\plaso +xcopy /q /y /s dist\preg\* dist\plaso +xcopy /q /y /s dist\pshell\* dist\plaso +xcopy /q /y /s dist\psort\* dist\plaso diff --git a/extra/README b/extra/README new file mode 100644 index 0000000..e0932c1 --- /dev/null +++ b/extra/README @@ -0,0 +1 @@ +This folder will contain additional files that contain filter criteria, tagging files, etc. diff --git a/extra/plaso_kibana_example.json b/extra/plaso_kibana_example.json new file mode 100644 index 0000000..ad1f8a9 --- /dev/null +++ b/extra/plaso_kibana_example.json @@ -0,0 +1,329 @@ +{ + "title": "Plaso", + "services": { + "query": { + "idQueue": [ + 1, + 2, + 3, + 4 + ], + "list": { + "0": { + "query": "*", + "alias": "", + "color": "#7EB26D", + "id": 0, + "pin": false, + "type": "lucene" + } + }, + "ids": [ + 0 + ] + }, + "filter": { + "idQueue": [ + 0, + 1, + 2 + ], + "list": {}, + "ids": [] + } + }, + "rows": [ + { + "title": "Histogram", + "height": "200px", + "editable": true, + "collapse": false, + "collapsable": true, + "panels": [ + { + "span": 12, + "editable": true, + "type": "histogram", + "loadingEditor": false, + "mode": "count", + "time_field": "datetime", + "queries": { + "mode": "all", + "ids": [ + 0 + ] + }, + "value_field": null, + "auto_int": true, + "resolution": 100, + "interval": "1y", + "intervals": [ + "auto", + "1s", + "1m", + "5m", + "10m", + "30m", + "1h", + "3h", + "12h", + "1d", + "1w", + "1M", + "1y" + ], + "fill": 0, + "linewidth": 3, + "timezone": "browser", + "spyable": true, + "zoomlinks": true, + "bars": true, + "stack": true, + "points": false, + "lines": false, + "legend": true, + "x-axis": true, + "y-axis": true, + "percentage": false, + "interactive": true, + "options": true, + "tooltip": { + "value_type": "cumulative", + "query_as_alias": false + }, + "title": "Histogram" + } + ], + "notice": false + }, + { + "title": "Graph", + "height": "250px", + "editable": true, + "collapse": false, + "collapsable": true, + "panels": [ + { + "error": false, + "span": 4, + "editable": true, + "type": "terms", + "loadingEditor": false, + "queries": { + "mode": "selected", + "ids": [ + 0 + ] + }, + "field": "source_short", + "exclude": [], + "missing": true, + "other": true, + "size": 10, + "order": "count", + "style": { + "font-size": "10pt" + }, + "donut": false, + "tilt": false, + "labels": true, + "arrangement": "horizontal", + "chart": "bar", + "counter_pos": "below", + "spyable": true, + "title": "Source Distribution" + }, + { + "error": false, + "span": 4, + "editable": true, + "type": "terms", + "loadingEditor": false, + "queries": { + "mode": "selected", + "ids": [] + }, + "field": "parser", + "exclude": [], + "missing": true, + "other": true, + "size": 10, + "order": "count", + "style": { + "font-size": "10pt" + }, + "donut": false, + "tilt": false, + "labels": true, + "arrangement": "horizontal", + "chart": "table", + "counter_pos": "above", + "spyable": true, + "title": "Parser Count" + }, + { + "error": false, + "span": 4, + "editable": true, + "type": "terms", + "loadingEditor": false, + "queries": { + "mode": "selected", + "ids": [] + }, + "field": "hostname", + "exclude": [], + "missing": true, + "other": true, + "size": 10, + "order": "count", + "style": { + "font-size": "10pt" + }, + "donut": false, + "tilt": false, + "labels": true, + "arrangement": "horizontal", + "chart": "bar", + "counter_pos": "above", + "spyable": true, + "title": "Hosts" + } + ], + "notice": false + }, + { + "title": "Events", + "height": "650px", + "editable": true, + "collapse": false, + "collapsable": true, + "panels": [ + { + "error": false, + "span": 12, + "editable": true, + "group": [ + "default" + ], + "type": "table", + "size": 100, + "pages": 5, + "offset": 0, + "sort": [ + "datetime", + "desc" + ], + "style": { + "font-size": "9pt" + }, + "overflow": "min-height", + "fields": [ + "datetime", + "timestamp_desc", + "hostname", + "username", + "source_short", + "source_long", + "message", + "tag", + "display_name" + ], + "highlight": [], + "sortable": true, + "header": true, + "paging": true, + "spyable": true, + "queries": { + "mode": "all", + "ids": [ + 0 + ] + }, + "field_list": true, + "status": "Stable", + "trimFactor": 300, + "normTimes": true, + "title": "Documents", + "all_fields": false + } + ], + "notice": false + } + ], + "editable": true, + "index": { + "interval": "none", + "pattern": "[logstash-]YYYY.MM.DD", + "default": "_all" + }, + "style": "light", + "failover": false, + "panel_hints": true, + "loader": { + "save_gist": false, + "save_elasticsearch": true, + "save_local": true, + "save_default": true, + "save_temp": true, + "save_temp_ttl_enable": true, + "save_temp_ttl": "30d", + "load_gist": true, + "load_elasticsearch": true, + "load_elasticsearch_size": 20, + "load_local": true, + "hide": false + }, + "pulldowns": [ + { + "type": "query", + "collapse": false, + "notice": false, + "query": "*", + "pinned": true, + "history": [], + "remember": 10, + "enable": true + }, + { + "type": "filtering", + "collapse": true, + "notice": false, + "enable": true + } + ], + "nav": [ + { + "type": "timepicker", + "collapse": false, + "notice": false, + "status": "Stable", + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ], + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "timefield": "@timestamp", + "enable": true + } + ], + "refresh": false +} \ No newline at end of file diff --git a/extra/tag_macosx.txt b/extra/tag_macosx.txt new file mode 100755 index 0000000..49dda0b --- /dev/null +++ b/extra/tag_macosx.txt @@ -0,0 +1,21 @@ +Application Execution + data_type is 'macosx:application_usage' + data_type is 'syslog:line' AND body contains 'COMMAND=/bin/launchctl' + +Application Install + data_type is 'plist:key' AND plugin is 'plist_install_history' + +AutoRun + data_type is 'fs:stat' AND filename contains 'LaunchAgents/' AND timestamp_desc is 'HFS_DETECT crtime' AND filename contains '.plist' + +File Downloaded + data_type is 'chrome:history:file_downloaded' + timestamp_desc is 'File Downloaded' + data_type is 'macosx:lsquarantine' + +Device Connected + data_type is 'ipod:device:entry' + data_type is 'plist:key' and plugin is 'plist_airport' + +Document Printed + (data_type is 'metadata:hachoir' OR data_type is 'metadata:OLECF') AND timestamp_desc contains 'Printed' diff --git a/extra/tag_windows.txt b/extra/tag_windows.txt new file mode 100755 index 0000000..8820591 --- /dev/null +++ b/extra/tag_windows.txt @@ -0,0 +1,94 @@ +Application Execution + data_type is 'windows:prefetch' + data_type is 'windows:lnk:link' and filename contains 'Recent' and (local_path contains '.exe' or network_path contains '.exe' or relative_path contains '.exe') + data_type is 'windows:registry:key_value' AND (plugin contains 'userassist' or plugin contains 'mru') AND regvalue.__all__ contains '.exe' + data_type is 'windows:evtx:record' and strings contains 'user mode service' and strings contains 'demand start' + data_type is 'fs:stat' and filename contains 'Windows/Tasks/At' + data_type is 'windows:tasks:job' + data_type is 'windows:evt:record' and source_name is 'Security' and event_identifier is 592 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Security-Auditing' and event_identifier is 4688 + data_type is 'windows:registry:appcompatcache' + +Application Installed + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Application-Experience' and event_identifier is 903 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Application-Experience' and event_identifier is 904 + +Application Updated + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Application-Experience' and event_identifier is 905 + +Application Removed + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Application-Experience' and event_identifier is 907 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Application-Experience' and event_identifier is 908 + +Document Opened + data_type is 'windows:registry:key_value' AND plugin contains 'mru' AND regvalue.__all__ not contains '.exe' AND timestamp > 0 + +Failed Login + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Security-Auditing' and event_identifier is 4625 + +Logon + data_type is 'windows:evt:record' and source_name is 'Security' and event_identifier is 540 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Security-Auditing' and event_identifier is 4624 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TerminalServices-LocalSessionManager' and event_identifer is 21 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TerminalServices-LocalSessionManager' and event_identifer is 1101 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Winlogon' and event_identifier is 7001 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TerminalServices-RemoteConnectionManager' and event_identifier is 1147 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TerminalServices-RemoteConnectionManager' and event_identifier is 1149 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-User Profiles Service' and event_identifier is 2 + +Logoff + data_type is 'windows:evt:record' and source_name is 'Security' and event_identifier is 538 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Security-Auditing' and event_identifier is 4634 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Winlogon' and event_identifier is 7002 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TerminalServices-LocalSessionManager' and event_identifer is 23 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TerminalServices-LocalSessionManager' and event_identifer is 1103 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-User Profiles Service' and event_identifier is 4 + +Disconnect + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TerminalServices-LocalSessionManager' and event_identifer is 24 + +Reconnect + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TerminalServices-LocalSessionManager' and event_identifer is 25 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TerminalServices-LocalSessionManager' and event_identifer is 1105 + +Shell Start + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TerminalServices-LocalSessionManager' and event_identifer is 22 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TerminalServices-LocalSessionManager' and event_identifer is 1102 + +Task Scheduled + data_type is 'windows:evt:record' and source_name is 'Security' and event_identifier is 602 + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Security-Auditing' and event_identifier is 4698 + +Job Success + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TaskScheduler' and event_identifier is 102 + +Action Success + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-TaskScheduler' and event_identifier is 201 + +Name Resolution Timeout + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-DNS-Client' and event_identifier is 1014 + +Time Change + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Kernel-General' and event_identifier is 1 + +Shutdown + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Kernel-General' and event_identifier is 13 + +System Start + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Kernel-General' and event_identifier is 13 + +System Sleep + data_type is 'windows:evtx:record' and source_name is 'Microsoft-Windows-Kernel-Power' and event_identifier is 42 + +AutoRun + data_type is 'windows:registry:key_value' and plugin contains 'Run' + +File Downloaded + data_type is 'chrome:history:file_downloaded' + timestamp_desc is 'File Downloaded' + +Document Printed + (data_type is 'metadata:hachoir' OR data_type is 'olecf:summary_info') AND timestamp_desc contains 'Printed' + +Startup Application + data_type is 'windows:registry:key_value' AND (plugin contains 'run' or plugin contains 'lfu') AND (regvalue.__all__ contains '.exe' OR regvalue.__all__ contains '.dll') diff --git a/plaso/__init__.py b/plaso/__init__.py new file mode 100644 index 0000000..93bd326 --- /dev/null +++ b/plaso/__init__.py @@ -0,0 +1,30 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = '1.2.0' + +VERSION_DEV = False +VERSION_DATE = '20141220' + + +def GetVersion(): + """Returns version information for plaso.""" + if not VERSION_DEV: + return __version__ + + return u'{0:s}_{1:s}'.format(__version__, VERSION_DATE) diff --git a/plaso/analysis/__init__.py b/plaso/analysis/__init__.py new file mode 100644 index 0000000..a4e6c8e --- /dev/null +++ b/plaso/analysis/__init__.py @@ -0,0 +1,83 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Import statements for analysis plugins and common methods.""" + +from plaso.analysis import interface +from plaso.lib import errors + +# Import statements of analysis plugins. +from plaso.analysis import browser_search +from plaso.analysis import chrome_extension +from plaso.analysis import windows_services + + +# TODO: move these functions to a manager class. And add a test for this +# function. +def ListAllPluginNames(show_all=True): + """Return a list of all available plugin names and it's doc string.""" + results = [] + for cls_obj in interface.AnalysisPlugin.classes.itervalues(): + doc_string, _, _ = cls_obj.__doc__.partition('\n') + + obj = cls_obj(None) + if not show_all and cls_obj.ENABLE_IN_EXTRACTION: + results.append((obj.plugin_name, doc_string, obj.plugin_type)) + elif show_all: + results.append((obj.plugin_name, doc_string, obj.plugin_type)) + + return sorted(results) + + +def LoadPlugins(plugin_names, incoming_queues, options=None): + """Yield analysis plugins for a given list of plugin names. + + Given a list of plugin names this method finds the analysis + plugins, initializes them and returns a generator. + + Args: + plugin_names: A list of plugin names that should be loaded up. This + should be a list of strings. + incoming_queues: A list of queues (QueueInterface object) that the plugin + uses to read in incoming events to analyse. + options: Optional command line arguments (instance of + argparse.Namespace). The default is None. + + Yields: + Analysis plugin objects (instances of AnalysisPlugin). + + Raises: + errors.BadConfigOption: If plugins_names does not contain a list of + strings. + """ + try: + plugin_names_lower = [word.lower() for word in plugin_names] + except AttributeError: + raise errors.BadConfigOption(u'Plugin names should be a list of strings.') + + for plugin_object in interface.AnalysisPlugin.classes.itervalues(): + plugin_name = plugin_object.NAME.lower() + + if plugin_name in plugin_names_lower: + queue_index = plugin_names_lower.index(plugin_name) + + try: + incoming_queue = incoming_queues[queue_index] + except (TypeError, IndexError): + incoming_queue = None + + yield plugin_object(incoming_queue, options) diff --git a/plaso/analysis/browser_search.py b/plaso/analysis/browser_search.py new file mode 100644 index 0000000..e9c88db --- /dev/null +++ b/plaso/analysis/browser_search.py @@ -0,0 +1,257 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A plugin that extracts browser history from events.""" + +import collections +import logging +import urllib + +from plaso import filters +from plaso.analysis import interface +from plaso.formatters import manager as formatters_manager +from plaso.lib import event + + +# Create a lightweight object that is used to store timeline based information +# about each search term. +SEARCH_OBJECT = collections.namedtuple( + 'SEARCH_OBJECT', 'time source engine search_term') + + +def ScrubLine(line): + """Scrub the line of most obvious HTML codes. + + An attempt at taking a line and swapping all instances + of %XX which represent a character in hex with it's + unicode character. + + Args: + line: The string that we are about to "fix". + + Returns: + String that has it's %XX hex codes swapped for text. + """ + if not line: + return '' + + try: + return unicode(urllib.unquote(str(line)), 'utf-8') + except UnicodeDecodeError: + logging.warning(u'Unable to decode line: {0:s}'.format(line)) + + return line + + +class FilterClass(object): + """A class that contains all the parser functions.""" + + @classmethod + def _GetBetweenQEqualsAndAmbersand(cls, string): + """Return back string that is defined 'q=' and '&'.""" + if 'q=' not in string: + return string + _, _, line = string.partition('q=') + before_and, _, _ = line.partition('&') + if not before_and: + return line + return before_and.split()[0] + + @classmethod + def _SearchAndQInLine(cls, string): + """Return a bool indicating if the words q= and search appear in string.""" + return 'search' in string and 'q=' in string + + @classmethod + def GoogleSearch(cls, url): + """Return back the extracted string.""" + if not cls._SearchAndQInLine(url): + return + + line = cls._GetBetweenQEqualsAndAmbersand(url) + if not line: + return + + return line.replace('+', ' ') + + @classmethod + def YouTube(cls, url): + """Return back the extracted string.""" + return cls.GenericSearch(url) + + @classmethod + def BingSearch(cls, url): + """Return back the extracted string.""" + return cls.GenericSearch(url) + + @classmethod + def GenericSearch(cls, url): + """Return back the extracted string from a generic search engine.""" + if not cls._SearchAndQInLine(url): + return + + return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ') + + @classmethod + def Yandex(cls, url): + """Return back the results from Yandex search engine.""" + if 'text=' not in url: + return + _, _, line = url.partition('text=') + before_and, _, _ = line.partition('&') + if not before_and: + return + yandex_search_url = before_and.split()[0] + + return yandex_search_url.replace('+', ' ') + + @classmethod + def DuckDuckGo(cls, url): + """Return back the extracted string.""" + if not 'q=' in url: + return + return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ') + + @classmethod + def Gmail(cls, url): + """Return back the extracted string.""" + if 'search/' not in url: + return + + _, _, line = url.partition('search/') + first, _, _ = line.partition('/') + second, _, _ = first.partition('?compose') + + return second.replace('+', ' ') + + +class AnalyzeBrowserSearchPlugin(interface.AnalysisPlugin): + """Analyze browser search entries from events.""" + + NAME = 'browser_search' + + # Indicate that we do not want to run this plugin during regular extraction. + ENABLE_IN_EXTRACTION = False + + # Here we define filters and callback methods for all hits on each filter. + FILTERS = ( + (('url iregexp "(www.|encrypted.|/)google." and url contains "search"'), + 'GoogleSearch'), + ('url contains "youtube.com"', 'YouTube'), + (('source is "WEBHIST" and url contains "bing.com" and url contains ' + '"search"'), 'BingSearch'), + ('url contains "mail.google.com"', 'Gmail'), + (('source is "WEBHIST" and url contains "yandex.com" and url contains ' + '"yandsearch"'), 'Yandex'), + ('url contains "duckduckgo.com"', 'DuckDuckGo') + ) + + # We need to implement the interface for analysis plugins, but we don't use + # command line options here, so disable checking for unused args. + # pylint: disable=unused-argument + def __init__(self, incoming_queue, options=None): + """Initializes the browser search analysis plugin. + + Args: + incoming_queue: A queue that is used to listen to incoming events. + options: Optional command line arguments (instance of + argparse.Namespace). The default is None. + """ + super(AnalyzeBrowserSearchPlugin, self).__init__(incoming_queue) + self._filter_dict = {} + self._counter = collections.Counter() + + # Store a list of search terms in a timeline format. + # The format is key = timestamp, value = (source, engine, search term). + self._search_term_timeline = [] + + for filter_str, call_back in self.FILTERS: + filter_obj = filters.GetFilter(filter_str) + call_back_obj = getattr(FilterClass, call_back, None) + if filter_obj and call_back_obj: + self._filter_dict[filter_obj] = (call_back, call_back_obj) + + # pylint: enable=unused-argument + + def CompileReport(self): + """Compiles a report of the analysis. + + Returns: + The analysis report (instance of AnalysisReport). + """ + report = event.AnalysisReport() + + results = {} + for key, count in self._counter.iteritems(): + search_engine, _, search_term = key.partition(':') + results.setdefault(search_engine, {}) + results[search_engine][search_term] = count + report.report_dict = results + report.report_array = self._search_term_timeline + + lines_of_text = [] + for search_engine, terms in sorted(results.items()): + lines_of_text.append(u' == ENGINE: {0:s} =='.format(search_engine)) + + for search_term, count in sorted( + terms.iteritems(), key=lambda x: (x[1], x[0]), reverse=True): + lines_of_text.append(u'{0:d} {1:s}'.format(count, search_term)) + + # An empty string is added to have SetText create an empty line. + lines_of_text.append(u'') + + report.SetText(lines_of_text) + + return report + + def ExamineEvent( + self, unused_analysis_context, event_object, **unused_kwargs): + """Analyzes an event object. + + Args: + analysis_context: An analysis context object + (instance of AnalysisContext). + event_object: An event object (instance of EventObject). + """ + # This event requires an URL attribute. + url_attribute = getattr(event_object, 'url', None) + + if not url_attribute: + return + + # TODO: refactor this the source should be used in formatting only. + # Check if we are dealing with a web history event. + source, _ = formatters_manager.EventFormatterManager.GetSourceStrings( + event_object) + + if source != 'WEBHIST': + return + + for filter_obj, call_backs in self._filter_dict.items(): + call_back_name, call_back_object = call_backs + if filter_obj.Match(event_object): + returned_line = ScrubLine(call_back_object(url_attribute)) + if not returned_line: + continue + self._counter[u'{0:s}:{1:s}'.format(call_back_name, returned_line)] += 1 + + # Add the timeline format for each search term. + self._search_term_timeline.append(SEARCH_OBJECT( + getattr(event_object, 'timestamp', 0), + getattr(event_object, 'plugin', getattr( + event_object, 'parser', u'N/A')), + call_back_name, returned_line)) diff --git a/plaso/analysis/browser_search_test.py b/plaso/analysis/browser_search_test.py new file mode 100644 index 0000000..0c013ac --- /dev/null +++ b/plaso/analysis/browser_search_test.py @@ -0,0 +1,74 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the browser search analysis plugin.""" + +import unittest + +from plaso.analysis import browser_search +from plaso.analysis import test_lib +# pylint: disable=unused-import +from plaso.formatters import chrome as chrome_formatter +from plaso.lib import event +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import chrome + + +class BrowserSearchAnalysisTest(test_lib.AnalysisPluginTestCase): + """Tests for the browser search analysis plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = sqlite.SQLiteParser() + + def testAnalyzeFile(self): + """Read a storage file that contains URL data and analyze it.""" + knowledge_base = self._SetUpKnowledgeBase() + + test_file = self._GetTestFilePath(['History']) + event_queue = self._ParseFile(self._parser, test_file, knowledge_base) + + analysis_plugin = browser_search.AnalyzeBrowserSearchPlugin(event_queue) + analysis_report_queue_consumer = self._RunAnalysisPlugin( + analysis_plugin, knowledge_base) + analysis_reports = self._GetAnalysisReportsFromQueue( + analysis_report_queue_consumer) + + self.assertEquals(len(analysis_reports), 1) + + analysis_report = analysis_reports[0] + + # Due to the behavior of the join one additional empty string at the end + # is needed to create the last empty line. + expected_text = u'\n'.join([ + u' == ENGINE: GoogleSearch ==', + u'1 really really funny cats', + u'1 java plugin', + u'1 funnycats.exe', + u'1 funny cats', + u'', + u'']) + + self.assertEquals(analysis_report.text, expected_text) + self.assertEquals(analysis_report.plugin_name, 'browser_search') + + expected_keys = set([u'GoogleSearch']) + self.assertEquals(set(analysis_report.report_dict.keys()), expected_keys) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/analysis/chrome_extension.py b/plaso/analysis/chrome_extension.py new file mode 100644 index 0000000..43da663 --- /dev/null +++ b/plaso/analysis/chrome_extension.py @@ -0,0 +1,201 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A plugin that gather extension ID's from Chrome history browser.""" + +import logging +import re +import urllib2 + +from plaso.analysis import interface +from plaso.lib import event + + +class AnalyzeChromeExtensionPlugin(interface.AnalysisPlugin): + """Convert Chrome extension ID's into names, requires Internet connection.""" + + NAME = 'chrome_extension' + + # Indicate that we can run this plugin during regular extraction. + ENABLE_IN_EXTRACTION = True + + _TITLE_RE = re.compile('([^<]+)') + _WEB_STORE_URL = u'https://chrome.google.com/webstore/detail/{xid}?hl=en-US' + + # We need to implement the interface for analysis plugins, but we don't use + # command line options here, so disable checking for unused args. + # pylint: disable=unused-argument + def __init__(self, incoming_queue, options=None): + """Initializes the Chrome extension analysis plugin. + + Args: + incoming_queue: A queue that is used to listen to incoming events. + options: Optional command line arguments (instance of + argparse.Namespace). The default is None. + """ + super(AnalyzeChromeExtensionPlugin, self).__init__(incoming_queue) + + self._results = {} + self.plugin_type = self.TYPE_REPORT + + # TODO: see if these can be moved to arguments passed to ExamineEvent + # or some kind of state object. + self._sep = None + self._user_paths = None + + # Saved list of already looked up extensions. + self._extensions = {} + + # pylint: enable=unused-argument + + def _GetChromeWebStorePage(self, extension_id): + """Retrieves the page for the extension from the Chrome store website. + + Args: + extension_id: string containing the extension identifier. + """ + web_store_url = self._WEB_STORE_URL.format(xid=extension_id) + try: + response = urllib2.urlopen(web_store_url) + + except urllib2.HTTPError as exception: + logging.warning(( + u'[{0:s}] unable to retrieve URL: {1:s} with error: {2:s}').format( + self.NAME, web_store_url, exception)) + return + + except urllib2.URLError as exception: + logging.warning(( + u'[{0:s}] invalid URL: {1:s} with error: {2:s}').format( + self.NAME, web_store_url, exception)) + return + + return response + + def _GetTitleFromChromeWebStore(self, extension_id): + """Retrieves the name of the extension from the Chrome store website. + + Args: + extension_id: string containing the extension identifier. + """ + # Check if we have already looked this extension up. + if extension_id in self._extensions: + return self._extensions.get(extension_id) + + response = self._GetChromeWebStorePage(extension_id) + if not response: + logging.warning( + u'[{0:s}] no data returned for extension identifier: {1:s}'.format( + self.NAME, extension_id)) + return + + first_line = response.readline() + match = self._TITLE_RE.search(first_line) + if match: + title = match.group(1) + if title.startswith(u'Chrome Web Store - '): + name = title[19:] + elif title.endswith(u'- Chrome Web Store'): + name = title[:-19] + + self._extensions[extension_id] = name + return name + + self._extensions[extension_id] = u'Not Found' + + def CompileReport(self): + """Compiles a report of the analysis. + + Returns: + The analysis report (instance of AnalysisReport). + """ + report = event.AnalysisReport() + + report.report_dict = self._results + + lines_of_text = [] + for user, extensions in sorted(self._results.iteritems()): + lines_of_text.append(u' == USER: {0:s} =='.format(user)) + for extension, extension_id in sorted(extensions): + lines_of_text.append(u' {0:s} [{1:s}]'.format(extension, extension_id)) + + # An empty string is added to have SetText create an empty line. + lines_of_text.append(u'') + + report.SetText(lines_of_text) + + return report + + def ExamineEvent(self, analysis_context, event_object, **unused_kwargs): + """Analyzes an event object. + + Args: + analysis_context: An analysis context object + (instance of AnalysisContext). + event_object: An event object (instance of EventObject). + """ + # Only interested in filesystem events. + if event_object.data_type != 'fs:stat': + return + + filename = getattr(event_object, 'filename', None) + if not filename: + return + + # Determine if we have a Chrome extension ID. + if u'chrome' not in filename.lower(): + return + + if not self._sep: + self._sep = analysis_context.GetPathSegmentSeparator(filename) + + if not self._user_paths: + self._user_paths = analysis_context.GetUserPaths(analysis_context.users) + + if u'{0:s}Extensions{0:s}'.format(self._sep) not in filename: + return + + # Now we have extension ID's, let's check if we've got the + # folder, nothing else. + paths = filename.split(self._sep) + if paths[-2] != u'Extensions': + return + + extension_id = paths[-1] + if extension_id == u'Temp': + return + + # Get the user and ID. + user = analysis_context.GetUsernameFromPath( + self._user_paths, filename, self._sep) + + # We still want this information in here, so that we can + # manually deduce the username. + if not user: + if len(filename) > 25: + user = u'Not found ({0:s}...)'.format(filename[0:25]) + else: + user = u'Not found ({0:s})'.format(filename) + + extension = self._GetTitleFromChromeWebStore(extension_id) + if not extension: + extension = extension_id + + self._results.setdefault(user, []) + extension_string = extension.decode('utf-8', 'ignore') + if (extension_string, extension_id) not in self._results[user]: + self._results[user].append((extension_string, extension_id)) diff --git a/plaso/analysis/chrome_extension_test.py b/plaso/analysis/chrome_extension_test.py new file mode 100644 index 0000000..baff358 --- /dev/null +++ b/plaso/analysis/chrome_extension_test.py @@ -0,0 +1,196 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the chrome extension analysis plugin.""" + +import os +import unittest + +from plaso.analysis import chrome_extension +from plaso.analysis import test_lib +from plaso.engine import queue +from plaso.engine import single_process +from plaso.lib import event + +# We are accessing quite a lot of protected members in this test file. +# Suppressing that message test file wide. +# pylint: disable=protected-access + + +class AnalyzeChromeExtensionTestPlugin( + chrome_extension.AnalyzeChromeExtensionPlugin): + """Chrome extension analysis plugin used for testing.""" + + NAME = 'chrome_extension_test' + + _TEST_DATA_PATH = os.path.join( + os.getcwd(), u'test_data', u'chrome_extensions') + + def _GetChromeWebStorePage(self, extension_id): + """Retrieves the page for the extension from the Chrome store test data. + + Args: + extension_id: string containing the extension identifier. + """ + chrome_web_store_file = os.path.join(self._TEST_DATA_PATH, extension_id) + if not os.path.exists(chrome_web_store_file): + return + + return open(chrome_web_store_file, 'rb') + + +class ChromeExtensionTest(test_lib.AnalysisPluginTestCase): + """Tests for the chrome extension analysis plugin.""" + + # Few config options here. + MAC_PATHS = [ + '/Users/dude/Libary/Application Data/Google/Chrome/Default/Extensions', + ('/Users/dude/Libary/Application Data/Google/Chrome/Default/Extensions/' + 'apdfllckaahabafndbhieahigkjlhalf'), + '/private/var/log/system.log', + '/Users/frank/Library/Application Data/Google/Chrome/Default', + '/Users/hans/Library/Application Data/Google/Chrome/Default', + ('/Users/frank/Library/Application Data/Google/Chrome/Default/' + 'Extensions/pjkljhegncpnkpknbcohdijeoejaedia'), + '/Users/frank/Library/Application Data/Google/Chrome/Default/Extensions',] + + WIN_PATHS = [ + 'C:\\Users\\Dude\\SomeFolder\\Chrome\\Default\\Extensions', + ('C:\\Users\\Dude\\SomeNoneStandardFolder\\Chrome\\Default\\Extensions\\' + 'hmjkmjkepdijhoojdojkdfohbdgmmhki'), + ('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\' + 'blpcfgokakmgnkcojhhkbfbldkacnbeo'), + '\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions', + ('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\' + 'icppfcnhkcmnfdhfhphakoifcfokfdhg'), + 'C:\\Windows\\System32', + '\\Stuff/with path separator\\Folder'] + + MAC_USERS = [ + {u'name': u'root', u'path': u'/var/root', u'sid': u'0'}, + {u'name': u'frank', u'path': u'/Users/frank', u'sid': u'4052'}, + {u'name': u'hans', u'path': u'/Users/hans', u'sid': u'4352'}, + {u'name': u'dude', u'path': u'/Users/dude', u'sid': u'1123'}] + + WIN_USERS = [ + {u'name': u'dude', u'path': u'C:\\Users\\dude', u'sid': u'S-1'}, + {u'name': u'frank', u'path': u'C:\\Users\\frank', u'sid': u'S-2'}] + + def _CreateTestEventObject(self, path): + """Create a test event object with a particular path.""" + event_object = event.EventObject() + event_object.data_type = 'fs:stat' + event_object.timestamp = 12345 + event_object.timestamp_desc = u'Some stuff' + event_object.filename = path + + return event_object + + def testMacAnalyzerPlugin(self): + """Test the plugin against mock events.""" + knowledge_base = self._SetUpKnowledgeBase(knowledge_base_values={ + 'users': self.MAC_USERS}) + + event_queue = single_process.SingleProcessQueue() + + # Fill the incoming queue with events. + test_queue_producer = queue.ItemQueueProducer(event_queue) + test_queue_producer.ProduceItems([ + self._CreateTestEventObject(path) for path in self.MAC_PATHS]) + test_queue_producer.SignalEndOfInput() + + # Initialize plugin. + analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue) + + # Run the analysis plugin. + analysis_report_queue_consumer = self._RunAnalysisPlugin( + analysis_plugin, knowledge_base) + analysis_reports = self._GetAnalysisReportsFromQueue( + analysis_report_queue_consumer) + + self.assertEquals(len(analysis_reports), 1) + + analysis_report = analysis_reports[0] + + self.assertEquals(analysis_plugin._sep, u'/') + + # Due to the behavior of the join one additional empty string at the end + # is needed to create the last empty line. + expected_text = u'\n'.join([ + u' == USER: dude ==', + u' Google Drive [apdfllckaahabafndbhieahigkjlhalf]', + u'', + u' == USER: frank ==', + u' Gmail [pjkljhegncpnkpknbcohdijeoejaedia]', + u'', + u'']) + + self.assertEquals(analysis_report.text, expected_text) + self.assertEquals(analysis_report.plugin_name, 'chrome_extension_test') + + expected_keys = set([u'frank', u'dude']) + self.assertEquals(set(analysis_report.report_dict.keys()), expected_keys) + + def testWinAnalyzePlugin(self): + """Test the plugin against mock events.""" + knowledge_base = self._SetUpKnowledgeBase(knowledge_base_values={ + 'users': self.WIN_USERS}) + + event_queue = single_process.SingleProcessQueue() + + # Fill the incoming queue with events. + test_queue_producer = queue.ItemQueueProducer(event_queue) + test_queue_producer.ProduceItems([ + self._CreateTestEventObject(path) for path in self.WIN_PATHS]) + test_queue_producer.SignalEndOfInput() + + # Initialize plugin. + analysis_plugin = AnalyzeChromeExtensionTestPlugin(event_queue) + + # Run the analysis plugin. + analysis_report_queue_consumer = self._RunAnalysisPlugin( + analysis_plugin, knowledge_base) + analysis_reports = self._GetAnalysisReportsFromQueue( + analysis_report_queue_consumer) + + self.assertEquals(len(analysis_reports), 1) + + analysis_report = analysis_reports[0] + + self.assertEquals(analysis_plugin._sep, u'\\') + + # Due to the behavior of the join one additional empty string at the end + # is needed to create the last empty line. + expected_text = u'\n'.join([ + u' == USER: dude ==', + u' Google Keep - notes and lists [hmjkmjkepdijhoojdojkdfohbdgmmhki]', + u'', + u' == USER: frank ==', + u' Google Play Music [icppfcnhkcmnfdhfhphakoifcfokfdhg]', + u' YouTube [blpcfgokakmgnkcojhhkbfbldkacnbeo]', + u'', + u'']) + + self.assertEquals(analysis_report.text, expected_text) + self.assertEquals(analysis_report.plugin_name, 'chrome_extension_test') + + expected_keys = set([u'frank', u'dude']) + self.assertEquals(set(analysis_report.report_dict.keys()), expected_keys) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/analysis/context.py b/plaso/analysis/context.py new file mode 100644 index 0000000..237e584 --- /dev/null +++ b/plaso/analysis/context.py @@ -0,0 +1,168 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The analysis context object.""" + + +class AnalysisContext(object): + """Class that implements the analysis context.""" + + def __init__(self, analysis_report_queue_producer, knowledge_base): + """Initializes a analysis plugin context object. + + Args: + analysis_report_queue_producer: the analysis report queue producer + (instance of ItemQueueProducer). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for analysis. + """ + super(AnalysisContext, self).__init__() + self._analysis_report_queue_producer = analysis_report_queue_producer + self._knowledge_base = knowledge_base + + self.number_of_produced_analysis_reports = 0 + + @property + def users(self): + """The list of users.""" + return self._knowledge_base.users + + def GetPathSegmentSeparator(self, path): + """Given a path give back the path separator as a best guess. + + Args: + path: the path. + + Returns: + The path segment separator. + """ + if path.startswith(u'\\') or path[1:].startswith(u':\\'): + return u'\\' + + if path.startswith(u'/'): + return u'/' + + if u'/' and u'\\' in path: + # Let's count slashes and guess which one is the right one. + forward_count = len(path.split(u'/')) + backward_count = len(path.split(u'\\')) + + if forward_count > backward_count: + return u'/' + else: + return u'\\' + + # Now we are sure there is only one type of separators yet + # the path does not start with one. + if u'/' in path: + return u'/' + else: + return u'\\' + + def GetUsernameFromPath(self, user_paths, file_path, path_segment_separator): + """Return a username based on preprocessing and the path. + + During preprocessing the tool will gather file paths to where each user + profile is stored, and which user it belongs to. This function takes in + a path to a file and compares it to a list of all discovered usernames + and paths to their profiles in the system. If it finds that the file path + belongs to a user profile it will return the username that the profile + belongs to. + + Args: + user_paths: A dictionary object containing the paths per username. + file_path: The full path to the file being analyzed. + path_segment_separator: String containing the path segment separator. + + Returns: + If possible the responsible username behind the file. Otherwise None. + """ + if not user_paths: + return + + if path_segment_separator != u'/': + use_path = file_path.replace(path_segment_separator, u'/') + else: + use_path = file_path + + if use_path[1:].startswith(u':/'): + use_path = use_path[2:] + + use_path = use_path.lower() + + for user, path in user_paths.iteritems(): + if use_path.startswith(path): + return user + + def GetUserPaths(self, users): + """Retrieves the user paths. + + Args: + users: a list of users. + + Returns: + A dictionary object containing the paths per username or None if no users. + """ + if not users: + return + + user_paths = {} + + user_separator = None + for user in users: + name = user.get('name') + path = user.get('path') + + if not path or not name: + continue + + if not user_separator: + user_separator = self.GetPathSegmentSeparator(path) + + if user_separator != u'/': + path = path.replace(user_separator, u'/').replace(u'//', u'/') + + if path[1:].startswith(u':/'): + path = path[2:] + + name = name.lower() + user_paths[name] = path.lower() + + return user_paths + + def ProcessAnalysisReport(self, analysis_report, plugin_name=None): + """Processes an analysis report before it is emitted to the queue. + + Args: + analysis_report: the analysis report object (instance of AnalysisReport). + plugin_name: Optional name of the plugin. The default is None. + """ + if not getattr(analysis_report, 'plugin_name', None) and plugin_name: + analysis_report.plugin_name = plugin_name + + def ProduceAnalysisReport(self, analysis_report, plugin_name=None): + """Produces an analysis report onto the queue. + + Args: + analysis_report: the analysis report object (instance of AnalysisReport). + plugin_name: Optional name of the plugin. The default is None. + """ + self.ProcessAnalysisReport(analysis_report, plugin_name=plugin_name) + + self._analysis_report_queue_producer.ProduceItem(analysis_report) + self.number_of_produced_analysis_reports += 1 diff --git a/plaso/analysis/context_test.py b/plaso/analysis/context_test.py new file mode 100644 index 0000000..9c67978 --- /dev/null +++ b/plaso/analysis/context_test.py @@ -0,0 +1,134 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the analysis context.""" + +import unittest + +from plaso.analysis import context +from plaso.analysis import test_lib +from plaso.engine import queue +from plaso.engine import single_process + + +class AnalysisContextTest(test_lib.AnalysisPluginTestCase): + """Tests for the analysis context.""" + + MAC_PATHS = [ + '/Users/dude/Library/Application Data/Google/Chrome/Default/Extensions', + ('/Users/dude/Library/Application Data/Google/Chrome/Default/Extensions/' + 'apdfllckaahabafndbhieahigkjlhalf'), + '/private/var/log/system.log', + '/Users/frank/Library/Application Data/Google/Chrome/Default', + '/Users/hans/Library/Application Data/Google/Chrome/Default', + ('/Users/frank/Library/Application Data/Google/Chrome/Default/' + 'Extensions/pjkljhegncpnkpknbcohdijeoejaedia'), + '/Users/frank/Library/Application Data/Google/Chrome/Default/Extensions',] + + WIN_PATHS = [ + 'C:\\Users\\Dude\\SomeFolder\\Chrome\\Default\\Extensions', + ('C:\\Users\\Dude\\SomeNoneStandardFolder\\Chrome\\Default\\Extensions\\' + 'hmjkmjkepdijhoojdojkdfohbdgmmhki'), + ('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\' + 'blpcfgokakmgnkcojhhkbfbldkacnbeo'), + '\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions', + ('\\Users\\frank\\AppData\\Local\\Google\\Chrome\\Extensions\\' + 'icppfcnhkcmnfdhfhphakoifcfokfdhg'), + 'C:\\Windows\\System32', + '\\Stuff/with path separator\\Folder'] + + MAC_USERS = [ + {u'name': u'root', u'path': u'/var/root', u'sid': u'0'}, + {u'name': u'frank', u'path': u'/Users/frank', u'sid': u'4052'}, + {u'name': u'hans', u'path': u'/Users/hans', u'sid': u'4352'}, + {u'name': u'dude', u'path': u'/Users/dude', u'sid': u'1123'}] + + WIN_USERS = [ + {u'name': u'dude', u'path': u'C:\\Users\\dude', u'sid': u'S-1'}, + {u'name': u'frank', u'path': u'C:\\Users\\frank', u'sid': u'S-2'}] + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + knowledge_base = self._SetUpKnowledgeBase() + + analysis_report_queue = single_process.SingleProcessQueue() + analysis_report_queue_producer = queue.ItemQueueProducer( + analysis_report_queue) + + self._analysis_context = context.AnalysisContext( + analysis_report_queue_producer, knowledge_base) + + def testGetPathSegmentSeparator(self): + """Tests the GetPathSegmentSeparator function.""" + for path in self.MAC_PATHS: + path_segment_separator = self._analysis_context.GetPathSegmentSeparator( + path) + self.assertEquals(path_segment_separator, u'/') + + for path in self.WIN_PATHS: + path_segment_separator = self._analysis_context.GetPathSegmentSeparator( + path) + self.assertEquals(path_segment_separator, u'\\') + + def testGetUserPaths(self): + """Tests the GetUserPaths function.""" + user_paths = self._analysis_context.GetUserPaths(self.MAC_USERS) + self.assertEquals( + set(user_paths.keys()), set([u'frank', u'dude', u'hans', u'root'])) + self.assertEquals(user_paths[u'frank'], u'/users/frank') + self.assertEquals(user_paths[u'dude'], u'/users/dude') + self.assertEquals(user_paths[u'hans'], u'/users/hans') + self.assertEquals(user_paths[u'root'], u'/var/root') + + user_paths = self._analysis_context.GetUserPaths(self.WIN_USERS) + self.assertEquals(set(user_paths.keys()), set([u'frank', u'dude'])) + self.assertEquals(user_paths[u'frank'], u'/users/frank') + self.assertEquals(user_paths[u'dude'], u'/users/dude') + + def testGetUsernameFromPath(self): + """Tests the GetUsernameFromPath function.""" + user_paths = self._analysis_context.GetUserPaths(self.MAC_USERS) + + username = self._analysis_context.GetUsernameFromPath( + user_paths, self.MAC_PATHS[0], u'/') + self.assertEquals(username, u'dude') + + username = self._analysis_context.GetUsernameFromPath( + user_paths, self.MAC_PATHS[4], u'/') + self.assertEquals(username, u'hans') + + username = self._analysis_context.GetUsernameFromPath( + user_paths, self.WIN_PATHS[0], u'/') + self.assertEquals(username, None) + + user_paths = self._analysis_context.GetUserPaths(self.WIN_USERS) + + username = self._analysis_context.GetUsernameFromPath( + user_paths, self.WIN_PATHS[0], u'\\') + self.assertEquals(username, u'dude') + + username = self._analysis_context.GetUsernameFromPath( + user_paths, self.WIN_PATHS[2], u'\\') + self.assertEquals(username, u'frank') + + username = self._analysis_context.GetUsernameFromPath( + user_paths, self.MAC_PATHS[2], u'\\') + self.assertEquals(username, None) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/analysis/interface.py b/plaso/analysis/interface.py new file mode 100644 index 0000000..1b447b5 --- /dev/null +++ b/plaso/analysis/interface.py @@ -0,0 +1,139 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains basic interface for analysis plugins.""" + +import abc + +from plaso.engine import queue +from plaso.lib import registry +from plaso.lib import timelib + + +class AnalysisPlugin(queue.EventObjectQueueConsumer): + """Analysis plugin gets a copy of each read event for analysis.""" + + __metaclass__ = registry.MetaclassRegistry + __abstract = True + + # The URLS should contain a list of URLs with additional information about + # this analysis plugin. + URLS = [] + + # The name of the plugin. This is the name that is matched against when + # loading plugins, so it is important that this name is short, concise and + # explains the nature of the plugin easily. It also needs to be unique. + NAME = 'Plugin' + + # A flag indicating whether or not this plugin should be run during extraction + # phase or reserved entirely for post processing stage. + # Typically this would mean that the plugin is perhaps too computationally + # heavy to be run during event extraction and should rather be run during + # post-processing. + # Since most plugins should perhaps rather be run during post-processing + # this is set to False by default and needs to be overwritten if the plugin + # should be able to run during the extraction phase. + ENABLE_IN_EXTRACTION = False + + # All the possible report types. + TYPE_ANOMALY = 1 # Plugin that is inspecting events for anomalies. + TYPE_STATISTICS = 2 # Statistical calculations. + TYPE_ANNOTATION = 3 # Inspecting events with the primary purpose of + # annotating or tagging them. + TYPE_REPORT = 4 # Inspecting events to provide a summary information. + + # Optional arguments to be added to the argument parser. + # An example would be: + # ARGUMENTS = [('--myparameter', { + # 'action': 'store', + # 'help': 'This is my parameter help', + # 'dest': 'myparameter', + # 'default': '', + # 'type': 'unicode'})] + # + # Where all arguments into the dict object have a direct translation + # into the argparse parser. + ARGUMENTS = [] + + # We need to implement the interface for analysis plugins, but we don't use + # command line options here, so disable checking for unused args. + # pylint: disable=unused-argument + def __init__(self, incoming_queue, options=None): + """Initializes an analysis plugin. + + Args: + incoming_queue: A queue that is used to listen to incoming events. + options: Optional command line arguments (instance of + argparse.Namespace). The default is None. + """ + super(AnalysisPlugin, self).__init__(incoming_queue) + self.plugin_type = self.TYPE_REPORT + + # pylint: enable=unused-argument + def _ConsumeEventObject(self, event_object, analysis_context=None, **kwargs): + """Consumes an event object callback for ConsumeEventObjects. + + Args: + event_object: An event object (instance of EventObject). + analysis_context: Optional analysis context object (instance of + AnalysisContext). The default is None. + """ + self.ExamineEvent(analysis_context, event_object, **kwargs) + + @property + def plugin_name(self): + """Return the name of the plugin.""" + return self.NAME + + @abc.abstractmethod + def CompileReport(self): + """Compiles a report of the analysis. + + After the plugin has received every copy of an event to + analyze this function will be called so that the report + can be assembled. + + Returns: + The analysis report (instance of AnalysisReport). + """ + + @abc.abstractmethod + def ExamineEvent(self, analysis_context, event_object, **kwargs): + """Analyzes an event object. + + Args: + analysis_context: An analysis context object (instance of + AnalysisContext). + event_object: An event object (instance of EventObject). + """ + + def RunPlugin(self, analysis_context): + """For each item in the queue send the read event to analysis. + + Args: + analysis_context: An analysis context object (instance of + AnalysisContext). + """ + self.ConsumeEventObjects(analysis_context=analysis_context) + + analysis_report = self.CompileReport() + + if analysis_report: + # TODO: move this into the plugins? + analysis_report.time_compiled = timelib.Timestamp.GetNow() + analysis_context.ProduceAnalysisReport( + analysis_report, plugin_name=self.plugin_name) diff --git a/plaso/analysis/test_lib.py b/plaso/analysis/test_lib.py new file mode 100644 index 0000000..1bb30a6 --- /dev/null +++ b/plaso/analysis/test_lib.py @@ -0,0 +1,171 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Analysis plugin related functions and classes for testing.""" + +import os +import unittest + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.analysis import context +from plaso.artifacts import knowledge_base +from plaso.engine import queue +from plaso.engine import single_process +from plaso.lib import event +from plaso.parsers import context as parsers_context + + +class TestAnalysisReportQueueConsumer(queue.ItemQueueConsumer): + """Class that implements a test analysis report queue consumer.""" + + def __init__(self, queue_object): + """Initializes the queue consumer. + + Args: + queue_object: the queue object (instance of Queue). + """ + super(TestAnalysisReportQueueConsumer, self).__init__(queue_object) + self.analysis_reports = [] + + def _ConsumeItem(self, analysis_report): + """Consumes an item callback for ConsumeItems. + + Args: + analysis_report: the analysis report (instance of AnalysisReport). + """ + self.analysis_reports.append(analysis_report) + + @property + def number_of_analysis_reports(self): + """The number of analysis reports.""" + return len(self.analysis_reports) + + +class AnalysisPluginTestCase(unittest.TestCase): + """The unit test case for an analysis plugin.""" + + _TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data') + + # Show full diff results, part of TestCase so does not follow our naming + # conventions. + maxDiff = None + + def _GetAnalysisReportsFromQueue(self, analysis_report_queue_consumer): + """Retrieves the analysis reports from the queue consumer. + + Args: + analysis_report_queue_consumer: the analysis report queue consumer + object (instance of + TestAnalysisReportQueueConsumer). + + Returns: + A list of analysis reports (instances of AnalysisReport). + """ + analysis_report_queue_consumer.ConsumeItems() + + analysis_reports = [] + for analysis_report in analysis_report_queue_consumer.analysis_reports: + self.assertIsInstance(analysis_report, event.AnalysisReport) + analysis_reports.append(analysis_report) + + return analysis_reports + + def _GetTestFilePath(self, path_segments): + """Retrieves the path of a test file relative to the test data directory. + + Args: + path_segments: the path segments inside the test data directory. + + Returns: + A path of the test file. + """ + # Note that we need to pass the individual path segments to os.path.join + # and not a list. + return os.path.join(self._TEST_DATA_PATH, *path_segments) + + def _ParseFile(self, parser_object, path, knowledge_base_object): + """Parses a file using the parser object. + + Args: + parser_object: the parser object. + path: the path of the file to parse. + knowledge_base_object: the knowledge base object (instance of + KnowledgeBase). + + Returns: + An event object queue object (instance of Queue). + """ + event_queue = single_process.SingleProcessQueue() + event_queue_producer = queue.ItemQueueProducer(event_queue) + + parse_error_queue = single_process.SingleProcessQueue() + + parser_context = parsers_context.ParserContext( + event_queue_producer, parse_error_queue, knowledge_base_object) + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=path) + file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) + + parser_object.Parse(parser_context, file_entry) + event_queue.SignalEndOfInput() + + return event_queue + + def _RunAnalysisPlugin(self, analysis_plugin, knowledge_base_object): + """Analyzes an event object queue using the plugin object. + + Args: + analysis_plugin: the analysis plugin object (instance of AnalysisPlugin). + knowledge_base_object: the knowledge base object (instance of + KnowledgeBase). + + Returns: + An event object queue object (instance of Queue). + """ + analysis_report_queue = single_process.SingleProcessQueue() + analysis_report_queue_consumer = TestAnalysisReportQueueConsumer( + analysis_report_queue) + analysis_report_queue_producer = queue.ItemQueueProducer( + analysis_report_queue) + + analysis_context = context.AnalysisContext( + analysis_report_queue_producer, knowledge_base_object) + + analysis_plugin.RunPlugin(analysis_context) + analysis_report_queue.SignalEndOfInput() + + return analysis_report_queue_consumer + + def _SetUpKnowledgeBase(self, knowledge_base_values=None): + """Sets up a knowledge base. + + Args: + knowledge_base_values: optional dict containing the knowledge base + values. The default is None. + + Returns: + An knowledge base object (instance of KnowledgeBase). + """ + knowledge_base_object = knowledge_base.KnowledgeBase() + if knowledge_base_values: + for identifier, value in knowledge_base_values.iteritems(): + knowledge_base_object.SetValue(identifier, value) + + return knowledge_base_object diff --git a/plaso/analysis/windows_services.py b/plaso/analysis/windows_services.py new file mode 100644 index 0000000..aa42d90 --- /dev/null +++ b/plaso/analysis/windows_services.py @@ -0,0 +1,267 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A plugin to enable quick triage of Windows Services.""" + +from plaso.analysis import interface +from plaso.lib import event +from plaso.winnt import human_readable_service_enums + +# Moving this import to the bottom due to complaints from certain versions of +# linters. +import yaml + + +class WindowsService(yaml.YAMLObject): + """Class to represent a Windows Service.""" + # This is used for comparison operations and defines attributes that should + # not be used during evaluation of whether two services are the same. + COMPARE_EXCLUDE = frozenset(['sources']) + + KEY_PATH_SEPARATOR = u'\\' + + # YAML attributes + yaml_tag = u'!WindowsService' + yaml_loader = yaml.SafeLoader + yaml_dumper = yaml.SafeDumper + + + def __init__(self, name, service_type, image_path, start_type, object_name, + source, service_dll=None): + """Initializes a new Windows service object. + + Args: + name: The name of the service + service_type: The value of the Type value of the service key. + image_path: The value of the ImagePath value of the service key. + start_type: The value of the Start value of the service key. + object_name: The value of the ObjectName value of the service key. + source: A tuple of (pathspec, Registry key) describing where the + service was found + service_dll: Optional string value of the ServiceDll value in the + service's Parameters subkey. The default is None. + + Raises: + TypeError: If a tuple with two elements is not passed as the 'source' + argument. + """ + self.name = name + self.service_type = service_type + self.image_path = image_path + self.start_type = start_type + self.service_dll = service_dll + self.object_name = object_name + if isinstance(source, tuple): + if len(source) != 2: + raise TypeError(u'Source arguments must be tuple of length 2.') + # A service may be found in multiple Control Sets or Registry hives, + # hence the list. + self.sources = [source] + else: + raise TypeError(u'Source argument must be a tuple.') + self.anomalies = [] + + @classmethod + def FromEvent(cls, service_event): + """Creates a Service object from an plaso event. + + Args: + service_event: The event object (instance of EventObject) to create a new + Service object from. + + """ + _, _, name = service_event.keyname.rpartition( + WindowsService.KEY_PATH_SEPARATOR) + service_type = service_event.regvalue.get('Type') + image_path = service_event.regvalue.get('ImagePath') + start_type = service_event.regvalue.get('Start') + service_dll = service_event.regvalue.get('ServiceDll', u'') + object_name = service_event.regvalue.get('ObjectName', u'') + if service_event.pathspec: + source = (service_event.pathspec.location, service_event.keyname) + else: + source = (u'Unknown', u'Unknown') + return cls( + name=name, service_type=service_type, image_path=image_path, + start_type=start_type, object_name=object_name, + source=source, service_dll=service_dll) + + def HumanReadableType(self): + """Return a human readable string describing the type value.""" + return human_readable_service_enums.SERVICE_ENUMS['Type'].get( + self.service_type, u'{0:d}'.format(self.service_type)) + + def HumanReadableStartType(self): + """Return a human readable string describing the start_type value.""" + return human_readable_service_enums.SERVICE_ENUMS['Start'].get( + self.start_type, u'{0:d}'.format(self.start_type)) + + def __eq__(self, other_service): + """Custom equality method so that we match near-duplicates. + + Compares two service objects together and evaluates if they are + the same or close enough to be considered to represent the same service. + + For two service objects to be considered the same they need to + have the the same set of attributes and same values for all their + attributes, other than those enumerated as reserved in the + COMPARE_EXCLUDE constant. + + Args: + other_service: The service (instance of WindowsService) we are testing + for equality. + + Returns: + A boolean value to indicate whether the services are equal. + + """ + if not isinstance(other_service, WindowsService): + return False + + attributes = set(self.__dict__.keys()) + other_attributes = set(self.__dict__.keys()) + + if attributes != other_attributes: + return False + + # We compare the values for all attributes, other than those specifically + # enumerated as not relevant for equality comparisons. + for attribute in attributes.difference(self.COMPARE_EXCLUDE): + if getattr(self, attribute, None) != getattr( + other_service, attribute, None): + return False + + return True + + +class WindowsServiceCollection(object): + """Class to hold and de-duplicate Windows Services.""" + + def __init__(self): + """Initialize a collection that holds Windows Service.""" + self._services = [] + + def AddService(self, new_service): + """Add a new service to the list of ones we know about. + + Args: + new_service: The service (instance of WindowsService) to add. + """ + for service in self._services: + if new_service == service: + # If this service is the same as one we already know about, we + # just want to add where it came from. + service.sources.append(new_service.sources[0]) + return + # We only add a new object to our list if we don't have + # an identical one already. + self._services.append(new_service) + + @property + def services(self): + """Get the services in this collection.""" + return self._services + + +class AnalyzeWindowsServicesPlugin(interface.AnalysisPlugin): + """Provides a single list of for Windows services found in the Registry.""" + + NAME = 'windows_services' + + # Indicate that we can run this plugin during regular extraction. + ENABLE_IN_EXTRACTION = True + + ARGUMENTS = [ + ('--windows-services-output', { + 'dest': 'windows-services-output', + 'type': unicode, + 'help': 'Specify how the results should be displayed. Options are ' + 'text and yaml.', + 'action': 'store', + 'default': u'text', + 'choices': [u'text', u'yaml']}),] + + def __init__(self, incoming_queue, options=None): + """Initializes the Windows Services plugin + + Args: + incoming_queue: A queue to read events from. + options: Optional command line arguments (instance of + argparse.Namespace). The default is None. + """ + super(AnalyzeWindowsServicesPlugin, self).__init__(incoming_queue) + self._service_collection = WindowsServiceCollection() + self.plugin_type = interface.AnalysisPlugin.TYPE_REPORT + self._output_mode = getattr(options, 'windows-services-output', u'text') + + def ExamineEvent(self, analysis_context, event_object, **kwargs): + """Analyzes an event_object and creates Windows Services as required. + + At present, this method only handles events extracted from the Registry. + + Args: + analysis_context: The context object analysis plugins. + event_object: The event object (instance of EventObject) to examine. + """ + # TODO: Handle event log entries here also (ie, event id 4697). + if getattr(event_object, 'data_type', None) != 'windows:registry:service': + return + else: + # Create and store the service. + service = WindowsService.FromEvent(event_object) + self._service_collection.AddService(service) + + def _FormatServiceText(self, service): + """Produces a human readable multi-line string representing the service. + + Args: + service: The service (instance of WindowsService) to format. + """ + string_segments = [ + service.name, + u'\tImage Path = {0:s}'.format(service.image_path), + u'\tService Type = {0:s}'.format(service.HumanReadableType()), + u'\tStart Type = {0:s}'.format(service.HumanReadableStartType()), + u'\tService Dll = {0:s}'.format(service.service_dll), + u'\tObject Name = {0:s}'.format(service.object_name), + u'\tSources:'] + for source in service.sources: + string_segments.append(u'\t\t{0:s}:{1:s}'.format(source[0], source[1])) + return u'\n'.join(string_segments) + + def CompileReport(self): + """Compiles a report of the analysis. + + Returns: + The analysis report (instance of AnalysisReport). + """ + report = event.AnalysisReport() + + if self._output_mode == 'yaml': + lines_of_text = [] + lines_of_text.append( + yaml.safe_dump_all(self._service_collection.services)) + else: + lines_of_text = ['Listing Windows Services'] + for service in self._service_collection.services: + lines_of_text.append(self._FormatServiceText(service)) + # Separate services with a blank line. + lines_of_text.append(u'') + + report.SetText(lines_of_text) + + return report diff --git a/plaso/analysis/windows_services_test.py b/plaso/analysis/windows_services_test.py new file mode 100644 index 0000000..1ee923f --- /dev/null +++ b/plaso/analysis/windows_services_test.py @@ -0,0 +1,192 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the windows services analysis plugin.""" + +import argparse +import unittest + +from dfvfs.path import fake_path_spec + +from plaso.analysis import test_lib +from plaso.analysis import windows_services +from plaso.engine import queue +from plaso.engine import single_process +from plaso.events import windows_events +from plaso.parsers import winreg + + +class WindowsServicesTest(test_lib.AnalysisPluginTestCase): + """Tests for the Windows Services analysis plugin.""" + + SERVICE_EVENTS = [ + {u'path': u'\\ControlSet001\\services\\TestbDriver', + u'text_dict': {u'ImagePath': u'C:\\Dell\\testdriver.sys', u'Type': 2, + u'Start': 2, u'ObjectName': u''}, + u'timestamp': 1346145829002031}, + # This is almost the same, but different timestamp and source, so that + # we can test the service de-duplication. + {u'path': u'\\ControlSet003\\services\\TestbDriver', + u'text_dict': {u'ImagePath': u'C:\\Dell\\testdriver.sys', u'Type': 2, + u'Start': 2, u'ObjectName': u''}, + u'timestamp': 1346145839002031}, + ] + + def _CreateAnalysisPlugin(self, input_queue, output_mode): + """Create an analysis plugin to test with. + + Args: + input_queue: A queue the plugin will read events from. + output_mode: The output format the plugin will use. + Valid options are 'text' and 'yaml'. + + Returns: + An instance of AnalyzeWindowsServicesPlugin. + """ + argument_parser = argparse.ArgumentParser() + plugin_args = windows_services.AnalyzeWindowsServicesPlugin.ARGUMENTS + for parameter, config in plugin_args: + argument_parser.add_argument(parameter, **config) + arguments = ['--windows-services-output', output_mode] + options = argument_parser.parse_args(arguments) + analysis_plugin = windows_services.AnalyzeWindowsServicesPlugin( + input_queue, options) + return analysis_plugin + + + def _CreateTestEventObject(self, service_event): + """Create a test event object with a particular path. + + Args: + service_event: A hash containing attributes of an event to add to the + queue. + + Returns: + An EventObject representing the service to be created. + """ + test_pathspec = fake_path_spec.FakePathSpec( + location=u'C:\\WINDOWS\\system32\\SYSTEM') + event_object = windows_events.WindowsRegistryServiceEvent( + service_event[u'timestamp'], service_event[u'path'], + service_event[u'text_dict']) + event_object.pathspec = test_pathspec + return event_object + + def testSyntheticKeysText(self): + """Test the plugin against mock events.""" + event_queue = single_process.SingleProcessQueue() + + # Fill the incoming queue with events. + test_queue_producer = queue.ItemQueueProducer(event_queue) + events = [self._CreateTestEventObject(service_event) + for service_event + in self.SERVICE_EVENTS] + test_queue_producer.ProduceItems(events) + test_queue_producer.SignalEndOfInput() + + # Initialize plugin. + analysis_plugin = self._CreateAnalysisPlugin(event_queue, u'text') + + # Run the analysis plugin. + knowledge_base = self._SetUpKnowledgeBase() + analysis_report_queue_consumer = self._RunAnalysisPlugin( + analysis_plugin, knowledge_base) + analysis_reports = self._GetAnalysisReportsFromQueue( + analysis_report_queue_consumer) + + self.assertEquals(len(analysis_reports), 1) + + analysis_report = analysis_reports[0] + + expected_text = ( + u'Listing Windows Services\n' + u'TestbDriver\n' + u'\tImage Path = C:\\Dell\\testdriver.sys\n' + u'\tService Type = File System Driver (0x2)\n' + u'\tStart Type = Auto Start (2)\n' + u'\tService Dll = \n' + u'\tObject Name = \n' + u'\tSources:\n' + u'\t\tC:\\WINDOWS\\system32\\SYSTEM:' + u'\\ControlSet001\\services\\TestbDriver\n' + u'\t\tC:\\WINDOWS\\system32\\SYSTEM:' + u'\\ControlSet003\\services\\TestbDriver\n\n') + + self.assertEquals(expected_text, analysis_report.text) + self.assertEquals(analysis_report.plugin_name, 'windows_services') + + def testRealEvents(self): + """Test the plugin with text output against real events from the parser.""" + parser = winreg.WinRegistryParser() + # We could remove the non-Services plugins, but testing shows that the + # performance gain is negligible. + + knowledge_base = self._SetUpKnowledgeBase() + test_path = self._GetTestFilePath(['SYSTEM']) + event_queue = self._ParseFile(parser, test_path, knowledge_base) + + # Run the analysis plugin. + analysis_plugin = self._CreateAnalysisPlugin(event_queue, u'text') + analysis_report_queue_consumer = self._RunAnalysisPlugin( + analysis_plugin, knowledge_base) + analysis_reports = self._GetAnalysisReportsFromQueue( + analysis_report_queue_consumer) + + report = analysis_reports[0] + text = report.text + + # We'll check that a few strings are in the report, like they're supposed + # to be, rather than checking for the exact content of the string, + # as that's dependent on the full path to the test files. + test_strings = [u'1394ohci', u'WwanSvc', u'Sources:', u'ControlSet001', + u'ControlSet002'] + for string in test_strings: + self.assertTrue(string in text) + + def testRealEventsYAML(self): + """Test the plugin with YAML output against real events from the parser.""" + parser = winreg.WinRegistryParser() + # We could remove the non-Services plugins, but testing shows that the + # performance gain is negligible. + + knowledge_base = self._SetUpKnowledgeBase() + test_path = self._GetTestFilePath(['SYSTEM']) + event_queue = self._ParseFile(parser, test_path, knowledge_base) + + # Run the analysis plugin. + analysis_plugin = self._CreateAnalysisPlugin(event_queue, 'yaml') + analysis_report_queue_consumer = self._RunAnalysisPlugin( + analysis_plugin, knowledge_base) + analysis_reports = self._GetAnalysisReportsFromQueue( + analysis_report_queue_consumer) + + report = analysis_reports[0] + text = report.text + + # We'll check that a few strings are in the report, like they're supposed + # to be, rather than checking for the exact content of the string, + # as that's dependent on the full path to the test files. + test_strings = [windows_services.WindowsService.yaml_tag, u'1394ohci', + u'WwanSvc', u'ControlSet001', u'ControlSet002'] + + for string in test_strings: + self.assertTrue(string in text, u'{0:s} not found in report text'.format( + string)) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/artifacts/__init__.py b/plaso/artifacts/__init__.py new file mode 100644 index 0000000..f462564 --- /dev/null +++ b/plaso/artifacts/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/artifacts/knowledge_base.py b/plaso/artifacts/knowledge_base.py new file mode 100644 index 0000000..59a91a6 --- /dev/null +++ b/plaso/artifacts/knowledge_base.py @@ -0,0 +1,137 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The artifact knowledge base object. + +The knowledge base is filled by user provided input and the pre-processing +phase. It is intended to provide successive phases, like the parsing and +analysis phases, with essential information like e.g. the timezone and +codepage of the source data. +""" + +from plaso.lib import event + +import pytz + + +class KnowledgeBase(object): + """Class that implements the artifact knowledge base.""" + + def __init__(self): + """Initialize the knowledge base object.""" + super(KnowledgeBase, self).__init__() + + # TODO: the first versions of the knowledge base will wrap the pre-process + # object, but this should be replaced by an artifact style knowledge base + # or artifact cache. + self._pre_obj = event.PreprocessObject() + + self._default_codepage = u'cp1252' + self._default_timezone = pytz.timezone('UTC') + + @property + def pre_obj(self): + """The pre-process object.""" + return self._pre_obj + + @property + def codepage(self): + """The codepage.""" + return getattr(self._pre_obj, 'codepage', self._default_codepage) + + @property + def hostname(self): + """The hostname.""" + return getattr(self._pre_obj, 'hostname', u'') + + @property + def platform(self): + """The platform.""" + return getattr(self._pre_obj, 'guessed_os', u'') + + @platform.setter + def platform(self, value): + """The platform.""" + return setattr(self._pre_obj, 'guessed_os', value) + + @property + def timezone(self): + """The timezone object.""" + return getattr(self._pre_obj, 'zone', self._default_timezone) + + @property + def users(self): + """The list of users.""" + return getattr(self._pre_obj, 'users', []) + + @property + def year(self): + """The year.""" + return getattr(self._pre_obj, 'year', 0) + + def GetUsernameByIdentifier(self, identifier): + """Retrieves the username based on an identifier. + + Args: + identifier: the identifier, either a UID or SID. + + Returns: + The username or - if not available. + """ + if not identifier: + return u'-' + + return self._pre_obj.GetUsernameById(identifier) + + def GetValue(self, identifier, default_value=None): + """Retrieves a value by identifier. + + Args: + identifier: the value identifier. + default_value: optional default value. The default is None. + + Returns: + The value or None if not available. + """ + return getattr(self._pre_obj, identifier, default_value) + + def SetDefaultCodepage(self, codepage): + """Sets the default codepage. + + Args: + codepage: the default codepage. + """ + # TODO: check if value is sane. + self._default_codepage = codepage + + def SetDefaultTimezone(self, timezone): + """Sets the default timezone. + + Args: + timezone: the default timezone. + """ + # TODO: check if value is sane. + self._default_timezone = timezone + + def SetValue(self, identifier, value): + """Sets a value by identifier. + + Args: + identifier: the value identifier. + value: the value. + """ + setattr(self._pre_obj, identifier, value) diff --git a/plaso/classifier/__init__.py b/plaso/classifier/__init__.py new file mode 100644 index 0000000..ae78399 --- /dev/null +++ b/plaso/classifier/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/classifier/classifier.py b/plaso/classifier/classifier.py new file mode 100644 index 0000000..649f31d --- /dev/null +++ b/plaso/classifier/classifier.py @@ -0,0 +1,184 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the format classifier classes. + +Plaso is a tool that extracts events from files on a file system. +For this it either reads files from a mounted file system or from an image. +It uses an exhaustive approach to determine parse events from a file, meaning +that it passes the file first to parser A and if that fails it continues with +parser B. + +The classifier is designed to be able to more quickly determine the format of +a file and limit the number of parsers part of the exhaustive approach. + +The current version of the classifier uses signatures to identify file formats. +Some signatures must always be defined at a specific offset, this is referred to +as an offset-bound signature or bound for short. Other signatures are commonly +found at a specific offset but not necessarily. The last form of signatures is +unbound, meaning that they don't have a fixed or common location where they can +be found. + +A specification is a collection of signatures with additional metadata that +defines a specific file format. These specifications are grouped into a store +for ease of use, e.g. so that they can be read from a configuration file all +at once. + +The classifier requires a scanner to analyze the data in a file. The scanner +uses the specifications in a store to scan for the signatures or a certain +format. + +The classifier allows for multiple methods of scanning a file: +* full: the entire file is scanned. This is the default scanning method. +* head-tail: only the beginning (head) and the end (tail) of the file is + scanned. This approach is more efficient for larger files. + The buffer size is used as the size of the data that is scanned. + Smaller files are scanned entirely. + +The classifier returns zero or more classifications which point to a format +specification and the scan results for the signatures defined by +the specification. +""" + +import logging + + +class Classification(object): + """This class represents a format classification. + + The format classification consists of a format specification and + scan results. + """ + + def __init__(self, specification, scan_matches): + """Initializes the classification. + + Args: + specification: the format specification (instance of Specification). + scan_matches: the list of scan matches (instances of _ScanMatch). + + Raises: + TypeError: if the specification is not of type Specification. + """ + self._specification = specification + self.scan_matches = scan_matches + + @property + def identifier(self): + """The classification type.""" + return self._specification.identifier + + @property + def magic_types(self): + """The magic types or an empty list if none.""" + return self._specification.magic_types + + @property + def mime_types(self): + """The mime type or an empty list if none.""" + return self._specification.mime_types + + +class Classifier(object): + """Class for classifying formats in raw data. + + The classifier is initialized with one or more specifications. + After which it can be used to classify data in files or file-like objects. + + The actual scanning of the data is done by the scanner, these are separate + to allow for the scanner to easily be replaced for a more efficient + alternative if necessary. + + For an example of how the classifier is to be used see: classify.py. + """ + BUFFER_SIZE = 16 * 1024 * 1024 + + def __init__(self, scanner): + """Initializes the classifier and sets up the scanning related structures. + + Args: + scanner: an instance of the signature scanner. + """ + self._scanner = scanner + + def _GetClassifications(self, scan_results): + """Retrieves the classifications based on the scan results. + + Multiple scan results are combined into a single classification. + + Args: + scan_results: a list containing instances of _ScanResult. + + Returns: + a list of instances of Classification. + """ + classifications = {} + + for scan_result in scan_results: + for scan_match in scan_result.scan_matches: + logging.debug( + u'scan match at offset: 0x{0:08x} specification: {1:s}'.format( + scan_match.total_data_offset, scan_result.identifier)) + + if scan_result.identifier not in classifications: + classifications[scan_result.identifier] = Classification( + scan_result.specification, scan_result.scan_matches) + + return classifications.values() + + def ClassifyBuffer(self, data, data_size): + """Classifies the data in a buffer, assumes all necessary data is available. + + Args: + data: a buffer containing raw data. + data_size: the size of the raw data in the buffer. + + Returns: + a list of classifications or an empty list. + """ + scan_state = self._scanner.StartScan() + self._scanner.ScanBuffer(scan_state, data, data_size) + self._scanner.StopScan(scan_state) + + return self._GetClassifications(scan_state.GetResults()) + + def ClassifyFileObject(self, file_object): + """Classifies the data in a file-like object. + + Args: + file_object: a file-like object. + + Returns: + a list of classifier classifications or an empty list. + """ + scan_results = self._scanner.ScanFileObject(file_object) + + return self._GetClassifications(scan_results) + + def ClassifyFile(self, filename): + """Classifies the data in a file. + + Args: + filename: the name of the file. + + Returns: + a list of classifier classifications or an empty list. + """ + classifications = [] + with open(filename, 'rb') as file_object: + classifications = self.ClassifyFileObject(file_object) + return classifications diff --git a/plaso/classifier/classifier_test.py b/plaso/classifier/classifier_test.py new file mode 100644 index 0000000..0575836 --- /dev/null +++ b/plaso/classifier/classifier_test.py @@ -0,0 +1,72 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains tests for the format classifier classes.""" + +import os +import unittest + +from plaso.classifier import classifier +from plaso.classifier import scanner +from plaso.classifier import test_lib + + +class ClassifierTest(unittest.TestCase): + """Class to test Classifier.""" + + def setUp(self): + """Function to test the initialize function.""" + self._store = test_lib.CreateSpecificationStore() + + self._test_file1 = os.path.join('test_data', 'NTUSER.DAT') + self._test_file2 = os.path.join('test_data', 'syslog.zip') + + def testClassifyFileWithScanner(self): + """Function to test the classify file function.""" + test_scanner = scanner.Scanner(self._store) + + test_classifier = classifier.Classifier(test_scanner) + classifications = test_classifier.ClassifyFile(self._test_file1) + self.assertEqual(len(classifications), 1) + + # TODO: assert the contents of the classification. + + test_classifier = classifier.Classifier(test_scanner) + classifications = test_classifier.ClassifyFile(self._test_file2) + self.assertEqual(len(classifications), 1) + + # TODO: assert the contents of the classification. + + def testClassifyFileWithOffsetBoundScanner(self): + """Function to test the classify file function.""" + test_scanner = scanner.OffsetBoundScanner(self._store) + + test_classifier = classifier.Classifier(test_scanner) + classifications = test_classifier.ClassifyFile(self._test_file1) + self.assertEqual(len(classifications), 1) + + # TODO: assert the contents of the classification. + + test_classifier = classifier.Classifier(test_scanner) + classifications = test_classifier.ClassifyFile(self._test_file2) + self.assertEqual(len(classifications), 1) + + # TODO: assert the contents of the classification. + + +if __name__ == "__main__": + unittest.main() diff --git a/plaso/classifier/classify.py b/plaso/classifier/classify.py new file mode 100644 index 0000000..f9789b9 --- /dev/null +++ b/plaso/classifier/classify.py @@ -0,0 +1,78 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a small classify test program.""" + +import argparse +import glob +import logging + +from plaso.classifier import classifier +from plaso.classifier import scanner +from plaso.classifier import test_lib + + +def Main(): + args_parser = argparse.ArgumentParser( + description='Classify test program.') + + args_parser.add_argument( + '-t', '--type', type='choice', metavar='TYPE', action='store', + dest='scanner_type', choices=['scan-tree', 'scan_tree'], + default='scan-tree', help='The scanner type') + + args_parser.add_argument( + '-v', '--verbose', action='store_true', dest='verbose', default=False, + help='Print verbose output') + + args_parser.add_argument( + 'filenames', nargs='+', action='store', metavar='FILENAMES', + default=None, help='The input filename(s) to classify.') + + options = args_parser.parse_args() + + if options.verbose: + logging.basicConfig(level=logging.DEBUG) + + files_to_classify = [] + for input_glob in options.filenames: + files_to_classify += glob.glob(input_glob) + + store = test_lib.CreateSpecificationStore() + + if options.scanner_type not in ['scan-tree', 'scan_tree']: + print u'Unsupported scanner type defaulting to: scan-tree' + + scan = scanner.Scanner(store) + classify = classifier.Classifier(scan) + + for input_filename in files_to_classify: + classifications = classify.ClassifyFile(input_filename) + + print u'File: {0:s}'.format(input_filename) + if not classifications: + print u'No classifications found.' + else: + print u'Classifications:' + for classification in classifications: + print u'\tformat: {0:s}'.format(classification.identifier) + + print u'' + + +if __name__ == '__main__': + Main() diff --git a/plaso/classifier/patterns.py b/plaso/classifier/patterns.py new file mode 100644 index 0000000..63f0aca --- /dev/null +++ b/plaso/classifier/patterns.py @@ -0,0 +1,308 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The patterns classes used by the scan tree-based format scanner.""" + + +class _ByteValuePatterns(object): + """Class that implements a mapping between byte value and patterns. + + The byte value patterns are used in the scan tree-based format scanner + to map a byte value to one or more patterns. + """ + + def __init__(self, byte_value): + """Initializes the pattern table (entry) byte value. + + Args: + byte_value: the byte value that maps the patterns in the table. + """ + super(_ByteValuePatterns, self).__init__() + self.byte_value = byte_value + self.patterns = {} + + def __unicode__(self): + """Retrieves a string representation of the byte value patterns.""" + return u'0x{0:02x} {1!s}'.format(ord(self.byte_value), self.patterns) + + def AddPattern(self, pattern): + """Adds a pattern. + + Args: + pattern: the pattern (instance of Pattern). + + Raises: + ValueError: if the table entry already contains a pattern + with the same identifier. + """ + if pattern.identifier in self.patterns: + raise ValueError(u'Pattern {0:s} is already defined.'.format( + pattern.identifier)) + + self.patterns[pattern.identifier] = pattern + + def ToDebugString(self, indentation_level=1): + """Converts the byte value pattern into a debug string.""" + indentation = u' ' * indentation_level + + header = u'{0:s}byte value: 0x{1:02x}\n'.format( + indentation, ord(self.byte_value)) + + entries = u''.join([u'{0:s} patterns: {1:s}\n'.format( + indentation, identifier) for identifier in self.patterns]) + + return u''.join([header, entries, u'\n']) + + +class _SkipTable(object): + """Class that implements a skip table. + + The skip table is used in the scan tree-based format scanner to determine + the skip value for the Boyer–Moore–Horspool search. + """ + + def __init__(self, skip_pattern_length): + """Initializes the skip table. + + Args: + skip_pattern_length: the (maximum) skip pattern length. + """ + super(_SkipTable, self).__init__() + self._skip_value_per_byte_value = {} + self.skip_pattern_length = skip_pattern_length + + def __getitem__(self, key): + """Retrieves a specific skip value. + + Args: + key: the byte value within the skip table. + + Returns: + the skip value for the key or the maximim skip value + if no corresponding key was found. + """ + if key in self._skip_value_per_byte_value: + return self._skip_value_per_byte_value[key] + return self.skip_pattern_length + + def SetSkipValue(self, byte_value, skip_value): + """Sets a skip value. + + Args: + byte_value: the corresponding byte value. + skip_value: the number of bytes to skip. + + Raises: + ValueError: if byte value or skip value is out of bounds. + """ + if byte_value < 0 or byte_value > 255: + raise ValueError(u'Invalid byte value, value out of bounds.') + + if skip_value < 0 or skip_value >= self.skip_pattern_length: + raise ValueError(u'Invalid skip value, value out of bounds.') + + if (not byte_value in self._skip_value_per_byte_value or + self._skip_value_per_byte_value[byte_value] > skip_value): + self._skip_value_per_byte_value[byte_value] = skip_value + + def ToDebugString(self): + """Converts the skip table into a debug string.""" + header = u'Byte value\tSkip value\n' + + entries = u''.join([u'0x{0:02x}\t{1:d}\n'.format( + byte_value, self._skip_value_per_byte_value[byte_value]) + for byte_value in self._skip_value_per_byte_value]) + + default = u'Default\t{0:d}\n'.format(self.skip_pattern_length) + + return u''.join([header, entries, default, u'\n']) + + +class Pattern(object): + """Class that implements a pattern.""" + + def __init__(self, signature_index, signature, specification): + """Initializes the pattern. + + Args: + signature_index: the index of the signature within the specification. + signature: the signature (instance of Signature). + specification: the specification (instance of Specification) that + contains the signature. + """ + super(Pattern, self).__init__() + self._signature_index = signature_index + self.signature = signature + self.specification = specification + + def __unicode__(self): + """Retrieves a string representation.""" + return self.identifier + + @property + def expression(self): + """The signature expression.""" + return self.signature.expression + + @property + def identifier(self): + """The identifier.""" + # Using _ here because some scanner implementation are limited to what + # characters can be used in the identifiers. + return u'{0:s}_{1:d}'.format( + self.specification.identifier, self._signature_index) + + @property + def offset(self): + """The signature offset.""" + return self.signature.offset + + @property + def is_bound(self): + """Boolean value to indicate the signature is bound to an offset.""" + return self.signature.is_bound + + +class PatternTable(object): + """Class that implements a pattern table. + + The pattern table is used in the the scan tree-based format scanner + to construct a scan tree. It contains either unbound patterns or + patterns bound to a specific offset. + """ + + def __init__(self, patterns, ignore_list, is_bound=None): + """Initializes and builds the patterns table from patterns. + + Args: + patterns: a list of the patterns. + ignore_list: a list of pattern offsets to ignore. + is_bound: optional boolean value to indicate if the signatures are bound + to offsets. The default is None, which means the value should + be ignored and both bound and unbound patterns are considered + unbound. + + Raises: + ValueError: if a signature pattern is too small to be useful (< 4). + """ + super(PatternTable, self).__init__() + self._byte_values_per_offset = {} + self.largest_pattern_length = 0 + self.largest_pattern_offset = 0 + self.patterns = [] + self.smallest_pattern_length = 0 + self.smallest_pattern_offset = 0 + + for pattern in patterns: + if is_bound is not None and pattern.signature.is_bound != is_bound: + continue + + pattern_length = len(pattern.expression) + + if pattern_length < 4: + raise ValueError(u'Pattern too small to be useful.') + + self.smallest_pattern_length = min( + self.smallest_pattern_length, pattern_length) + self.largest_pattern_length = max( + self.largest_pattern_length, pattern_length) + + self.patterns.append(pattern) + + self._AddPattern(pattern, ignore_list, is_bound) + + def _AddPattern(self, pattern, ignore_list, is_bound): + """Adds the byte values per offset in the pattern to the table. + + Args: + pattern: the pattern (instance of Pattern). + ignore_list: a list of pattern offsets to ignore. + is_bound: boolean value to indicate if the signatures are bound + to offsets. A value of None indicates that the value should + be ignored and both bound and unbound patterns are considered + unbound. + """ + pattern_offset = pattern.offset if is_bound else 0 + + self.smallest_pattern_offset = min( + self.smallest_pattern_offset, pattern_offset) + self.largest_pattern_offset = max( + self.largest_pattern_offset, pattern_offset) + + for byte_value in pattern.expression: + if pattern_offset not in self._byte_values_per_offset: + self._byte_values_per_offset[pattern_offset] = {} + + if pattern_offset not in ignore_list: + byte_values = self._byte_values_per_offset[pattern_offset] + + if byte_value not in byte_values: + byte_values[byte_value] = _ByteValuePatterns(byte_value) + + byte_value_patterns = byte_values[byte_value] + + byte_value_patterns.AddPattern(pattern) + + pattern_offset += 1 + + @property + def offsets(self): + """The offsets.""" + return self._byte_values_per_offset.keys() + + def GetByteValues(self, pattern_offset): + """Returns the bytes values for a specific pattern offset.""" + return self._byte_values_per_offset[pattern_offset] + + def GetSkipTable(self): + """Retrieves the skip table for the patterns in the table. + + Returns: + The skip table (instance of SkipTable). + """ + skip_table = _SkipTable(self.smallest_pattern_length) + + for pattern in self.patterns: + if pattern.expression: + skip_value = self.smallest_pattern_length + + for expression_index in range(0, self.smallest_pattern_length): + skip_value -= 1 + skip_table.SetSkipValue( + ord(pattern.expression[expression_index]), skip_value) + + return skip_table + + def ToDebugString(self): + """Converts the pattern table into a debug string.""" + header = u'Pattern offset\tByte value(s)\n' + entries = u'' + + for pattern_offset in self._byte_values_per_offset: + entries += u'{0:d}'.format(pattern_offset) + + byte_values = self._byte_values_per_offset[pattern_offset] + + for byte_value in byte_values: + identifiers = u', '.join( + [identifier for identifier in byte_values[byte_value].patterns]) + + entries += u'\t0x{0:02x} ({1:s})'.format(ord(byte_value), identifiers) + + entries += u'\n' + + return u''.join([header, entries, u'\n']) diff --git a/plaso/classifier/range_list.py b/plaso/classifier/range_list.py new file mode 100644 index 0000000..2545ca7 --- /dev/null +++ b/plaso/classifier/range_list.py @@ -0,0 +1,156 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The range list data type.""" + + +class Range(object): + """Class that implements a range object.""" + + def __init__(self, range_offset, range_size): + """Initializes the range object. + + Args: + range_offset: the range offset. + range_size: the range size. + + Raises: + ValueError: if the range offset or range size is not valid. + """ + if range_offset < 0: + raise ValueError(u'Invalid range offset value.') + + if range_size < 0: + raise ValueError(u'Invalid range size value.') + + super(Range, self).__init__() + self.start_offset = range_offset + self.size = range_size + self.end_offset = range_offset + range_size + + +class RangeList(object): + """Class that implements a range list object.""" + + def __init__(self): + """Initializes the range list object.""" + super(RangeList, self).__init__() + self.ranges = [] + + @property + def number_of_ranges(self): + """The number of ranges.""" + return len(self.ranges) + + def GetSpanningRange(self): + """Retrieves the range spanning the entire range list.""" + if self.number_of_ranges == 0: + return + + first_range = self.ranges[0] + last_range = self.ranges[-1] + range_size = last_range.end_offset - first_range.start_offset + + return Range(first_range.start_offset, range_size) + + def Insert(self, range_offset, range_size): + """Inserts the range defined by the offset and size in the list. + + Note that overlapping ranges will be merged. + + Args: + range_offset: the range offset. + range_size: the range size. + + Raises: + RuntimeError: if the range cannot be inserted. + ValueError: if the range offset or range size is not valid. + """ + if range_offset < 0: + raise ValueError(u'Invalid range offset value.') + + if range_size < 0: + raise ValueError(u'Invalid range size value.') + + insert_index = None + merge_index = None + + number_of_range_objects = len(self.ranges) + + range_end_offset = range_offset + range_size + + if number_of_range_objects == 0: + insert_index = 0 + + else: + range_object_index = 0 + + for range_object in self.ranges: + # Ignore negative ranges. + if range_object.start_offset < 0: + range_object_index += 1 + continue + + # Insert the range before an existing one. + if range_end_offset < range_object.start_offset: + insert_index = range_object_index + break + + # Ignore the range since the existing one overlaps it. + if (range_offset >= range_object.start_offset and + range_end_offset <= range_object.end_offset): + break + + # Merge the range since it overlaps the existing one at the end. + if (range_offset >= range_object.start_offset and + range_offset <= range_object.end_offset): + merge_index = range_object_index + break + + # Merge the range since it overlaps the existing one at the start. + if (range_end_offset >= range_object.start_offset and + range_end_offset <= range_object.end_offset): + merge_index = range_object_index + break + + # Merge the range since it overlaps the existing one. + if (range_offset <= range_object.start_offset and + range_end_offset >= range_object.end_offset): + merge_index = range_object_index + break + + range_object_index += 1 + + # Insert the range after the last one. + if range_object_index >= number_of_range_objects: + insert_index = number_of_range_objects + + if insert_index is not None and merge_index is not None: + raise RuntimeError( + u'Unable to insert the range both insert and merge specified.') + + if insert_index is not None: + self.ranges.insert(insert_index, Range(range_offset, range_size)) + + elif merge_index is not None: + range_object = self.ranges[merge_index] + if range_offset < range_object.start_offset: + range_object.size += range_object.start_offset - range_offset + range_object.start_offset = range_offset + if range_end_offset > range_object.end_offset: + range_object.size += range_end_offset - range_object.end_offset + range_object.end_offset = range_end_offset diff --git a/plaso/classifier/range_list_test.py b/plaso/classifier/range_list_test.py new file mode 100644 index 0000000..2e77a36 --- /dev/null +++ b/plaso/classifier/range_list_test.py @@ -0,0 +1,113 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the range list.""" + +import unittest + +from plaso.classifier import range_list + + +class RangeListTest(unittest.TestCase): + """Class to test the range list.""" + + def testInsertPositiveRanges(self): + """Function to test the insert function using positive ranges.""" + range_list_object = range_list.RangeList() + + # Test non-overlapping range. + range_list_object.Insert(500, 100) + self.assertEquals(range_list_object.number_of_ranges, 1) + + range_object = range_list_object.ranges[0] + self.assertEquals(range_object.start_offset, 500) + self.assertEquals(range_object.end_offset, 600) + self.assertEquals(range_object.size, 100) + + # Test non-overlapping range. + range_list_object.Insert(2000, 100) + self.assertEquals(range_list_object.number_of_ranges, 2) + + range_object = range_list_object.ranges[1] + self.assertEquals(range_object.start_offset, 2000) + self.assertEquals(range_object.end_offset, 2100) + self.assertEquals(range_object.size, 100) + + # Test range that overlaps with an existing range at the start. + range_list_object.Insert(1950, 100) + self.assertEquals(range_list_object.number_of_ranges, 2) + + range_object = range_list_object.ranges[1] + self.assertEquals(range_object.start_offset, 1950) + self.assertEquals(range_object.end_offset, 2100) + self.assertEquals(range_object.size, 150) + + # Test range that overlaps with an existing range at the end. + range_list_object.Insert(2050, 100) + self.assertEquals(range_list_object.number_of_ranges, 2) + + range_object = range_list_object.ranges[1] + self.assertEquals(range_object.start_offset, 1950) + self.assertEquals(range_object.end_offset, 2150) + self.assertEquals(range_object.size, 200) + + # Test non-overlapping range. + range_list_object.Insert(1000, 100) + self.assertEquals(range_list_object.number_of_ranges, 3) + + range_object = range_list_object.ranges[1] + self.assertEquals(range_object.start_offset, 1000) + self.assertEquals(range_object.end_offset, 1100) + self.assertEquals(range_object.size, 100) + + # Test range that aligns with an existing range at the end. + range_list_object.Insert(1100, 100) + self.assertEquals(range_list_object.number_of_ranges, 3) + + range_object = range_list_object.ranges[1] + self.assertEquals(range_object.start_offset, 1000) + self.assertEquals(range_object.end_offset, 1200) + self.assertEquals(range_object.size, 200) + + # Test range that aligns with an existing range at the start. + range_list_object.Insert(900, 100) + self.assertEquals(range_list_object.number_of_ranges, 3) + + range_object = range_list_object.ranges[1] + self.assertEquals(range_object.start_offset, 900) + self.assertEquals(range_object.end_offset, 1200) + self.assertEquals(range_object.size, 300) + + # Test non-overlapping range. + range_list_object.Insert(0, 100) + self.assertEquals(range_list_object.number_of_ranges, 4) + + range_object = range_list_object.ranges[0] + self.assertEquals(range_object.start_offset, 0) + self.assertEquals(range_object.end_offset, 100) + self.assertEquals(range_object.size, 100) + + # Test invalid ranges. + with self.assertRaises(ValueError): + range_list_object.Insert(-1, 100) + + with self.assertRaises(ValueError): + range_list_object.Insert(3000, -100) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/classifier/scan_tree.py b/plaso/classifier/scan_tree.py new file mode 100644 index 0000000..c7b8039 --- /dev/null +++ b/plaso/classifier/scan_tree.py @@ -0,0 +1,744 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The scan tree classes used by the scan tree-based format scanner.""" + +import logging + +from plaso.classifier import patterns +from plaso.classifier import range_list + + +class _PatternWeights(object): + """Class that implements pattern weights.""" + + def __init__(self): + """Initializes the pattern weights.""" + super(_PatternWeights, self).__init__() + self._offsets_per_weight = {} + self._weight_per_offset = {} + + def AddOffset(self, pattern_offset): + """Adds a pattern offset and sets its weight to 0. + + Args: + pattern_offset: the pattern offset to add to the pattern weights. + + Raises: + ValueError: if the pattern weights already contains the pattern offset. + """ + if pattern_offset in self._weight_per_offset: + raise ValueError(u'Pattern offset already set.') + + self._weight_per_offset[pattern_offset] = 0 + + def AddWeight(self, pattern_offset, weight): + """Adds a weight for a specific pattern offset. + + Args: + pattern_offset: the pattern offset to add to the pattern weights. + weight: the corresponding weight to add. + + Raises: + ValueError: if the pattern weights does not contain the pattern offset. + """ + if pattern_offset not in self._weight_per_offset: + raise ValueError(u'Pattern offset not set.') + + self._weight_per_offset[pattern_offset] += weight + + if weight not in self._offsets_per_weight: + self._offsets_per_weight[weight] = [] + + self._offsets_per_weight[weight].append(pattern_offset) + + def GetLargestWeight(self): + """Retrieves the largest weight or 0 if none.""" + if self._offsets_per_weight: + return max(self._offsets_per_weight) + + return 0 + + def GetOffsetsForWeight(self, weight): + """Retrieves the list of offsets for a specific weight.""" + return self._offsets_per_weight[weight] + + def GetWeightForOffset(self, pattern_offset): + """Retrieves the weight for a specific pattern offset.""" + return self._weight_per_offset[pattern_offset] + + def ToDebugString(self): + """Converts the pattern weights into a debug string.""" + header1 = u'Pattern offset\tWeight\n' + + entries1 = u''.join([u'{0:d}\t{1:d}\n'.format( + pattern_offset, self._weight_per_offset[pattern_offset]) + for pattern_offset in self._weight_per_offset]) + + header2 = u'Weight\tPattern offset(s)\n' + + entries2 = u''.join([u'{0:d}\t{1!s}\n'.format( + weight, self._offsets_per_weight[weight]) + for weight in self._offsets_per_weight]) + + return u''.join([header1, entries1, u'\n', header2, entries2, u'\n']) + + def SetWeight(self, pattern_offset, weight): + """Sets a weight for a specific pattern offset. + + Args: + pattern_offset: the pattern offset to set in the pattern weights. + weight: the corresponding weight to set. + + Raises: + ValueError: if the pattern weights does not contain the pattern offset. + """ + if pattern_offset not in self._weight_per_offset: + raise ValueError(u'Pattern offset not set.') + + self._weight_per_offset[pattern_offset] = weight + + if weight not in self._offsets_per_weight: + self._offsets_per_weight[weight] = [] + + self._offsets_per_weight[weight].append(pattern_offset) + + +class ScanTree(object): + """Class that implements a scan tree.""" + + _COMMON_BYTE_VALUES = frozenset( + '\x00\x01\xff\t\n\r 0123456789' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + 'abcdefghijklmnopqrstuvwxyz') + + # The offset must be positive, negative offsets are ignored. + OFFSET_MODE_POSITIVE = 1 + # The offset must be negative, positive offsets are ignored. + OFFSET_MODE_NEGATIVE = 2 + # The offset must be positive, an error is raised for negative offsets. + OFFSET_MODE_POSITIVE_STRICT = 3 + # The offset must be negative, an error is raised for positive offsets. + OFFSET_MODE_NEGATIVE_STRICT = 4 + + def __init__( + self, specification_store, is_bound, + offset_mode=OFFSET_MODE_POSITIVE_STRICT): + """Initializes and builds the scan tree. + + Args: + specification_store: the specification store (instance of + SpecificationStore) that contains the format + specifications. + is_bound: boolean value to indicate if the signatures are bound + to offsets. A value of None indicates that the value should + be ignored and both bound and unbound patterns are considered + unbound. + offset_mode: optional value to indicate how the signature offsets should + be handled. The default is that the offset must be positive + and an error is raised for negative offsets. + """ + super(ScanTree, self).__init__() + self.largest_length = 0 + self.pattern_list = [] + self.range_list = range_list.RangeList() + self.root_node = None + self.skip_table = None + + # First determine all the patterns from the specification store. + self._BuildPatterns(specification_store, is_bound, offset_mode=offset_mode) + + # Next create the scan tree starting with the root node. + ignore_list = [] + pattern_table = patterns.PatternTable( + self.pattern_list, ignore_list, is_bound) + + if pattern_table.patterns: + self.root_node = self._BuildScanTreeNode( + pattern_table, ignore_list, is_bound) + + logging.debug(u'Scan tree:\n{0:s}'.format( + self.root_node.ToDebugString())) + + # At the end the skip table is determined to provide for the + # Boyer–Moore–Horspool skip value. + self.skip_table = pattern_table.GetSkipTable() + + logging.debug(u'Skip table:\n{0:s}'.format( + self.skip_table.ToDebugString())) + + self.largest_length = pattern_table.largest_pattern_length + + def _BuildPatterns( + self, specification_store, is_bound, + offset_mode=OFFSET_MODE_POSITIVE_STRICT): + """Builds the list of patterns. + + Args: + specification_store: the specification store (instance of + SpecificationStore) that contains the format + specifications. + is_bound: boolean value to indicate if the signatures are bound + to offsets. A value of None indicates that the value should + be ignored and both bound and unbound patterns are considered + unbound. + offset_mode: optional value to indicate how the signature offsets should + be handled. The default is that the offset must be positive + and an error is raised for negative offsets. + + Raises: + ValueError: if a signature offset invalid according to specified offset + mode or a signature pattern is too small to be useful (< 4). + """ + self.pattern_list = [] + + for specification in specification_store.specifications: + signature_index = 0 + + for signature in specification.signatures: + if signature.expression: + signature_offset = signature.offset if is_bound else 0 + signature_pattern_length = len(signature.expression) + + # Make sure signature offset is numeric. + try: + signature_offset = int(signature_offset) + except (TypeError, ValueError): + signature_offset = 0 + + if signature_offset < 0: + if offset_mode == self.OFFSET_MODE_POSITIVE: + continue + elif offset_mode == self.OFFSET_MODE_POSITIVE_STRICT: + raise ValueError(u'Signature offset less than 0.') + + # The range list does not allow offsets to be negative and thus + # the signature offset is turned into a positive equivalent. + signature_offset *= -1 + + # The signature size is substracted to make sure the spanning + # range will align with the original negative offset values. + signature_offset -= signature_pattern_length + + elif signature_offset > 0: + if offset_mode == self.OFFSET_MODE_NEGATIVE: + continue + elif offset_mode == self.OFFSET_MODE_NEGATIVE_STRICT: + raise ValueError(u'Signature offset greater than 0.') + + if signature_pattern_length < 4: + raise ValueError(u'Signature pattern smaller than 4.') + + pattern = patterns.Pattern( + signature_index, signature, specification) + self.pattern_list.append(pattern) + self.range_list.Insert(signature_offset, signature_pattern_length) + + signature_index += 1 + + def _BuildScanTreeNode(self, pattern_table, ignore_list, is_bound): + """Builds a scan tree node. + + Args: + pattern_table: a pattern table (instance of PatternTable). + ignore_list: a list of pattern offsets to ignore + is_bound: boolean value to indicate if the signatures are bound + to offsets. A value of None indicates that the value should + be ignored and both bound and unbound patterns are considered + unbound. + + Raises: + ValueError: if number of byte value patterns value out of bounds. + + Returns: + A scan tree node (instance of ScanTreeNode). + """ + # Make a copy of the lists because the function is going to alter them + # and the changes must remain in scope of the function. + pattern_list = list(pattern_table.patterns) + ignore_list = list(ignore_list) + + similarity_weights = _PatternWeights() + occurrence_weights = _PatternWeights() + value_weights = _PatternWeights() + + for pattern_offset in pattern_table.offsets: + similarity_weights.AddOffset(pattern_offset) + occurrence_weights.AddOffset(pattern_offset) + value_weights.AddOffset(pattern_offset) + + byte_values = pattern_table.GetByteValues(pattern_offset) + number_of_byte_values = len(byte_values) + + if number_of_byte_values > 1: + occurrence_weights.SetWeight(pattern_offset, number_of_byte_values) + + for byte_value in byte_values: + byte_value_patterns = byte_values[byte_value] + byte_value_weight = len(byte_value_patterns.patterns) + + if byte_value_weight > 1: + similarity_weights.AddWeight(pattern_offset, byte_value_weight) + + if byte_value_weight not in self._COMMON_BYTE_VALUES: + value_weights.AddWeight(pattern_offset, 1) + + logging.debug(u'Pattern table:\n{0:s}'.format( + pattern_table.ToDebugString())) + logging.debug(u'Similarity weights:\n{0:s}'.format( + similarity_weights.ToDebugString())) + logging.debug(u'Occurrence weights:\n{0:s}'.format( + occurrence_weights.ToDebugString())) + logging.debug(u'Value weights:\n{0:s}'.format( + value_weights.ToDebugString())) + + pattern_offset = self._GetMostSignificantPatternOffset( + pattern_list, similarity_weights, occurrence_weights, value_weights) + + ignore_list.append(pattern_offset) + + # For the scan tree negative offsets are adjusted so that + # the smallest pattern offset is 0. + scan_tree_pattern_offset = pattern_offset + if scan_tree_pattern_offset < 0: + scan_tree_pattern_offset -= pattern_table.smallest_pattern_offset + + scan_tree_node = ScanTreeNode(scan_tree_pattern_offset) + + byte_values = pattern_table.GetByteValues(pattern_offset) + + for byte_value in byte_values: + byte_value_patterns = byte_values[byte_value] + + logging.debug(u'{0:s}'.format(byte_value_patterns.ToDebugString())) + + number_of_byte_value_patterns = len(byte_value_patterns.patterns) + + if number_of_byte_value_patterns <= 0: + raise ValueError( + u'Invalid number of byte value patterns value out of bounds.') + + elif number_of_byte_value_patterns == 1: + for identifier in byte_value_patterns.patterns: + logging.debug( + u'Adding pattern: {0:s} for byte value: 0x{1:02x}.'.format( + identifier, ord(byte_value))) + + scan_tree_node.AddByteValue( + byte_value, byte_value_patterns.patterns[identifier]) + + else: + pattern_table = patterns.PatternTable( + byte_value_patterns.patterns.itervalues(), ignore_list, is_bound) + + scan_sub_node = self._BuildScanTreeNode( + pattern_table, ignore_list, is_bound) + + logging.debug( + u'Adding scan node for byte value: 0x{0:02x}\n{1:s}'.format( + ord(byte_value), scan_sub_node.ToDebugString())) + + scan_tree_node.AddByteValue(ord(byte_value), scan_sub_node) + + for identifier in byte_value_patterns.patterns: + logging.debug(u'Removing pattern: {0:s} from:\n{1:s}'.format( + identifier, self._PatternsToDebugString(pattern_list))) + + pattern_list.remove(byte_value_patterns.patterns[identifier]) + + logging.debug(u'Remaining patterns:\n{0:s}'.format( + self._PatternsToDebugString(pattern_list))) + + number_of_patterns = len(pattern_list) + + if number_of_patterns == 1: + logging.debug(u'Setting pattern: {0:s} for default value'.format( + pattern_list[0].identifier)) + + scan_tree_node.SetDefaultValue(pattern_list[0]) + + elif number_of_patterns > 1: + pattern_table = patterns.PatternTable(pattern_list, ignore_list, is_bound) + + scan_sub_node = self._BuildScanTreeNode( + pattern_table, ignore_list, is_bound) + + logging.debug(u'Setting scan node for default value:\n{0:s}'.format( + scan_sub_node.ToDebugString())) + + scan_tree_node.SetDefaultValue(scan_sub_node) + + return scan_tree_node + + def _GetMostSignificantPatternOffset( + self, pattern_list, similarity_weights, occurrence_weights, + value_weights): + """Returns the most significant pattern offset. + + Args: + pattern_list: a list of patterns + similarity_weights: the similarity (pattern) weights. + occurrence_weights: the occurrence (pattern) weights. + value_weights: the value (pattern) weights. + + Raises: + ValueError: when pattern is an empty list. + + Returns: + a pattern offset. + """ + if not pattern_list: + raise ValueError(u'Missing pattern list.') + + pattern_offset = None + number_of_patterns = len(pattern_list) + + if number_of_patterns == 1: + pattern_offset = self._GetPatternOffsetForValueWeights( + value_weights) + + elif number_of_patterns == 2: + pattern_offset = self._GetPatternOffsetForOccurrenceWeights( + occurrence_weights, value_weights) + + elif number_of_patterns > 2: + pattern_offset = self._GetPatternOffsetForSimilarityWeights( + similarity_weights, occurrence_weights, value_weights) + + logging.debug(u'Largest weight offset: {0:d}'.format(pattern_offset)) + + return pattern_offset + + def _GetPatternOffsetForOccurrenceWeights( + self, occurrence_weights, value_weights): + """Returns the most significant pattern offset based on the value weights. + + Args: + occurrence_weights: the occurrence (pattern) weights. + value_weights: the value (pattern) weights. + + Returns: + a pattern offset. + """ + debug_string = "" + pattern_offset = None + + largest_weight = occurrence_weights.GetLargestWeight() + logging.debug(u'Largest occurrence weight: {0:d}'.format(largest_weight)) + + if largest_weight > 0: + occurrence_weight_offsets = occurrence_weights.GetOffsetsForWeight( + largest_weight) + number_of_occurrence_offsets = len(occurrence_weight_offsets) + else: + number_of_occurrence_offsets = 0 + + if number_of_occurrence_offsets == 0: + pattern_offset = self._GetPatternOffsetForValueWeights( + value_weights) + + elif number_of_occurrence_offsets == 1: + pattern_offset = occurrence_weight_offsets[0] + + else: + largest_weight = 0 + largest_value_weight = 0 + + for occurrence_offset in occurrence_weight_offsets: + value_weight = value_weights.GetWeightForOffset( + occurrence_offset) + + debug_string = ( + u'Occurrence offset: {0:d} value weight: {1:d}').format( + occurrence_offset, value_weight) + + if not pattern_offset or largest_weight < value_weight: + largest_weight = value_weight + pattern_offset = occurrence_offset + + debug_string += u' largest value weight: {0:d}'.format( + largest_value_weight) + + logging.debug(u'{0:s}'.format(debug_string)) + + return pattern_offset + + def _GetPatternOffsetForSimilarityWeights( + self, similarity_weights, occurrence_weights, value_weights): + """Returns the most significant pattern offset. + + Args: + similarity_weights: the similarity (pattern) weights. + occurrence_weights: the occurrence (pattern) weights. + value_weights: the value (pattern) weights. + + Returns: + a pattern offset. + """ + debug_string = "" + pattern_offset = None + + largest_weight = similarity_weights.GetLargestWeight() + logging.debug(u'Largest similarity weight: {0:d}'.format(largest_weight)) + + if largest_weight > 0: + similarity_weight_offsets = similarity_weights.GetOffsetsForWeight( + largest_weight) + number_of_similarity_offsets = len(similarity_weight_offsets) + else: + number_of_similarity_offsets = 0 + + if number_of_similarity_offsets == 0: + pattern_offset = self._GetPatternOffsetForOccurrenceWeights( + occurrence_weights, value_weights) + + elif number_of_similarity_offsets == 1: + pattern_offset = similarity_weight_offsets[0] + + else: + largest_weight = 0 + largest_value_weight = 0 + + for similarity_offset in similarity_weight_offsets: + occurrence_weight = occurrence_weights.GetWeightForOffset( + similarity_offset) + + debug_string = ( + u'Similarity offset: {0:d} occurrence weight: {1:d}').format( + similarity_offset, occurrence_weight) + + if largest_weight > 0 and largest_weight == occurrence_weight: + value_weight = value_weights.GetWeightForOffset( + similarity_offset) + + debug_string += u' value weight: {0:d}'.format(value_weight) + + if largest_value_weight < value_weight: + largest_weight = 0 + + if not pattern_offset or largest_weight < occurrence_weight: + largest_weight = occurrence_weight + pattern_offset = similarity_offset + + largest_value_weight = value_weights.GetWeightForOffset( + similarity_offset) + + debug_string += u' largest value weight: {0:d}'.format( + largest_value_weight) + + logging.debug(u'{0:s}'.format(debug_string)) + + return pattern_offset + + def _GetPatternOffsetForValueWeights( + self, value_weights): + """Returns the most significant pattern offset based on the value weights. + + Args: + value_weights: the value (pattern) weights. + + Raises: + RuntimeError: no value weight offset were found. + + Returns: + a pattern offset. + """ + largest_weight = value_weights.GetLargestWeight() + logging.debug(u'Largest value weight: {0:d}'.format(largest_weight)) + + if largest_weight > 0: + value_weight_offsets = value_weights.GetOffsetsForWeight(largest_weight) + number_of_value_offsets = len(value_weight_offsets) + else: + number_of_value_offsets = 0 + + if number_of_value_offsets == 0: + raise RuntimeError(u'No value weight offsets found.') + + return value_weight_offsets[0] + + def _PatternsToDebugString(self, pattern_list): + """Converts the list of patterns into a debug string.""" + entries = u', '.join([u'{0:s}'.format(pattern) for pattern in pattern_list]) + + return u''.join([u'[', entries, u']']) + + +class ScanTreeNode(object): + """Class that implements a scan tree node.""" + + def __init__(self, pattern_offset): + """Initializes the scan tree node. + + Args: + pattern_offset: the offset in the pattern to which the node + applies. + """ + super(ScanTreeNode, self).__init__() + self._byte_values = {} + self.default_value = None + self.parent = None + self.pattern_offset = pattern_offset + + def AddByteValue(self, byte_value, scan_object): + """Adds a byte value. + + Args: + byte_value: the corresponding byte value. + scan_object: the scan object, either a scan sub node or a pattern. + + Raises: + ValueError: if byte value is out of bounds or if the node already + contains a scan object for the byte value. + """ + if isinstance(byte_value, str): + byte_value = ord(byte_value) + + if byte_value < 0 or byte_value > 255: + raise ValueError(u'Invalid byte value, value out of bounds.') + + if byte_value in self._byte_values: + raise ValueError(u'Byte value already set.') + + if isinstance(scan_object, ScanTreeNode): + scan_object.parent = self + + self._byte_values[byte_value] = scan_object + + def CompareByteValue( + self, data, data_offset, data_size, total_data_offset, + total_data_size=None): + """Scans a buffer using the bounded scan tree. + + This function will return partial matches on the ata block block + boundary as long as the total data size has not been reached. + + Args: + data: a buffer containing raw data. + data_offset: the offset in the raw data in the buffer. + data_size: the size of the raw data in the buffer. + total_data_offset: the offset of the data relative to the start of + the total data scanned. + total_data_size: optional value to indicate the total data size. + The default is None. + + Returns: + the resulting scan object which is either a ScanTreeNode or Pattern + or None. + + Raises: + RuntimeError: if the data offset, total data offset, total data size + or pattern offset value is out of bounds. + """ + found_match = False + scan_tree_byte_value = 0 + + if data_offset < 0 or data_offset >= data_size: + raise RuntimeError(u'Invalid data offset, value out of bounds.') + + if total_data_size is not None and total_data_size < 0: + raise RuntimeError(u'Invalid total data size, value out of bounds.') + + if total_data_offset < 0 or ( + total_data_size is not None and total_data_offset >= total_data_size): + raise RuntimeError(u'Invalid total data offset, value out of bounds.') + + if (total_data_size is not None and + total_data_offset + data_size >= total_data_size): + match_on_boundary = True + else: + match_on_boundary = False + + data_offset += self.pattern_offset + + if not match_on_boundary and data_offset >= data_size: + raise RuntimeError(u'Invalid pattern offset value, out of bounds.') + + if data_offset < data_size: + data_byte_value = ord(data[data_offset]) + + for scan_tree_byte_value in self._byte_values: + if data_byte_value == scan_tree_byte_value: + found_match = True + break + + if found_match: + scan_object = self._byte_values[scan_tree_byte_value] + + logging.debug( + u'Scan tree node match at data offset: 0x{0:08x}.'.format(data_offset) + ) + + else: + scan_object = self.default_value + + if not scan_object: + scan_object = self.parent + while scan_object and not scan_object.default_value: + scan_object = scan_object.parent + + if scan_object: + scan_object = scan_object.default_value + + return scan_object + + def SetDefaultValue(self, scan_object): + """Sets the default (non-match) value. + + Args: + scan_object: the scan object, either a scan sub node or a pattern. + + Raises: + ValueError: if the default value is already set. + """ + if self.default_value: + raise ValueError(u'Default value already set.') + + self.default_value = scan_object + + def ToDebugString(self, indentation_level=1): + """Converts the scan tree node into a debug string.""" + indentation = u' ' * indentation_level + + header = u'{0:s}pattern offset: {1:d}\n'.format( + indentation, self.pattern_offset) + + entries = u'' + + for byte_value in self._byte_values: + entries += u'{0:s}byte value: 0x{1:02x}\n'.format(indentation, byte_value) + + if isinstance(self._byte_values[byte_value], ScanTreeNode): + entries += u'{0:s}scan tree node:\n'.format(indentation) + entries += self._byte_values[byte_value].ToDebugString( + indentation_level + 1) + + elif isinstance(self._byte_values[byte_value], patterns.Pattern): + entries += u'{0:s}pattern: {1:s}\n'.format( + indentation, self._byte_values[byte_value].identifier) + + default = u'{0:s}default value:\n'.format(indentation) + + if isinstance(self.default_value, ScanTreeNode): + default += u'{0:s}scan tree node:\n'.format(indentation) + default += self.default_value.ToDebugString(indentation_level + 1) + + elif isinstance(self.default_value, patterns.Pattern): + default += u'{0:s}pattern: {1:s}\n'.format( + indentation, self.default_value.identifier) + + return u''.join([header, entries, default, u'\n']) diff --git a/plaso/classifier/scan_tree_test.py b/plaso/classifier/scan_tree_test.py new file mode 100644 index 0000000..e3227b5 --- /dev/null +++ b/plaso/classifier/scan_tree_test.py @@ -0,0 +1,74 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains tests for the scan tree classes.""" + +import unittest + +from plaso.classifier import patterns +from plaso.classifier import scan_tree +from plaso.classifier import specification + + +class ScanTreeNodeTest(unittest.TestCase): + """Class to test the scan tree node.""" + + def testAddByteValueWithPattern(self): + """Function to test the add byte value with pattern function.""" + scan_node = scan_tree.ScanTreeNode(0) + + format_regf = specification.Specification('REGF') + format_regf.AddNewSignature('regf', offset=0) + + format_esedb = specification.Specification('ESEDB') + format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4) + + signature_esedb = specification.Signature('\xef\xcd\xab\x89', offset=4) + signature_regf = specification.Signature('regf', offset=0) + + pattern_regf = patterns.Pattern(0, signature_regf, format_regf) + pattern_esedb = patterns.Pattern(0, signature_esedb, format_esedb) + + scan_node.AddByteValue('r', pattern_regf) + scan_node.AddByteValue('\xef', pattern_esedb) + + self.assertRaises( + ValueError, scan_node.AddByteValue, 'r', pattern_regf) + self.assertRaises( + ValueError, scan_node.AddByteValue, -1, pattern_regf) + self.assertRaises( + ValueError, scan_node.AddByteValue, 256, pattern_regf) + + def testAddByteValueWithScanNode(self): + """Function to test the add byte value with scan node function.""" + scan_node = scan_tree.ScanTreeNode(0) + scan_sub_node_0x41 = scan_tree.ScanTreeNode(1) + scan_sub_node_0x80 = scan_tree.ScanTreeNode(1) + + scan_node.AddByteValue(0x41, scan_sub_node_0x41) + scan_node.AddByteValue(0x80, scan_sub_node_0x80) + + self.assertRaises( + ValueError, scan_node.AddByteValue, 0x80, scan_sub_node_0x80) + self.assertRaises( + ValueError, scan_node.AddByteValue, -1, scan_sub_node_0x80) + self.assertRaises( + ValueError, scan_node.AddByteValue, 256, scan_sub_node_0x80) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/classifier/scanner.py b/plaso/classifier/scanner.py new file mode 100644 index 0000000..b18582d --- /dev/null +++ b/plaso/classifier/scanner.py @@ -0,0 +1,749 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the classes for a scan tree-based format scanner.""" + +import logging +import os + +from plaso.classifier import patterns +from plaso.classifier import range_list +from plaso.classifier import scan_tree + + +class _ScanMatch(object): + """Class that implements a scan match.""" + + def __init__(self, total_data_offset, pattern): + """Initializes the scan result. + + Args: + total_data_offset: the offset of the resulting match relative + to the start of the total data scanned. + pattern: the pattern matched. + """ + super(_ScanMatch, self).__init__() + self.total_data_offset = total_data_offset + self.pattern = pattern + + @property + def specification(self): + """The specification.""" + return self.pattern.specification + + +class _ScanResult(object): + """Class that implements a scan result.""" + + def __init__(self, specification): + """Initializes the scan result. + + Args: + scan_tree_node: the corresponding scan tree node or None. + """ + super(_ScanResult, self).__init__() + self.specification = specification + self.scan_matches = [] + + @property + def identifier(self): + """The specification identifier.""" + return self.specification.identifier + + +class ScanState(object): + """Class that implements a scan state.""" + + # The state definitions. + _SCAN_STATE_START = 1 + _SCAN_STATE_SCANNING = 2 + _SCAN_STATE_STOP = 3 + + def __init__(self, scan_tree_node, total_data_size=None): + """Initializes the scan state. + + Args: + scan_tree_node: the corresponding scan tree node or None. + total_data_size: optional value to indicate the total data size. + The default is None. + """ + super(ScanState, self).__init__() + self._matches = [] + self.remaining_data = None + self.remaining_data_size = 0 + self.scan_tree_node = scan_tree_node + self.state = self._SCAN_STATE_START + self.total_data_offset = 0 + self.total_data_size = total_data_size + + def AddMatch(self, total_data_offset, pattern): + """Adds a result to the state to scanning. + + Args: + total_data_offset: the offset of the resulting match relative + to the start total data scanned. + pattern: the pattern matched. + + Raises: + RuntimeError: when a unsupported state is encountered. + """ + if (self.state != self._SCAN_STATE_START and + self.state != self._SCAN_STATE_SCANNING): + raise RuntimeError(u'Unsupported scan state.') + + self._matches.append(_ScanMatch(total_data_offset, pattern)) + + def GetMatches(self): + """Retrieves a list containing the results. + + Returns: + A list of scan matches (instances of _ScanMatch). + + Raises: + RuntimeError: when a unsupported state is encountered. + """ + if self.state != self._SCAN_STATE_STOP: + raise RuntimeError(u'Unsupported scan state.') + + return self._matches + + def Reset(self, scan_tree_node): + """Resets the state to start. + + This function will clear the remaining data. + + Args: + scan_tree_node: the corresponding scan tree node or None. + + Raises: + RuntimeError: when a unsupported state is encountered. + """ + if self.state != self._SCAN_STATE_STOP: + raise RuntimeError(u'Unsupported scan state.') + + self.remaining_data = None + self.remaining_data_size = 0 + self.scan_tree_node = scan_tree_node + self.state = self._SCAN_STATE_START + + def Scanning(self, scan_tree_node, total_data_offset): + """Sets the state to scanning. + + Args: + scan_tree_node: the active scan tree node. + total_data_offset: the offset of the resulting match relative + to the start of the total data scanned. + + Raises: + RuntimeError: when a unsupported state is encountered. + """ + if (self.state != self._SCAN_STATE_START and + self.state != self._SCAN_STATE_SCANNING): + raise RuntimeError(u'Unsupported scan state.') + + self.scan_tree_node = scan_tree_node + self.state = self._SCAN_STATE_SCANNING + self.total_data_offset = total_data_offset + + def Stop(self): + """Sets the state to stop. + + Raises: + RuntimeError: when a unsupported state is encountered. + """ + if (self.state != self._SCAN_STATE_START and + self.state != self._SCAN_STATE_SCANNING): + raise RuntimeError(u'Unsupported scan state.') + + self.scan_tree_node = None + self.state = self._SCAN_STATE_STOP + + +class ScanTreeScannerBase(object): + """Class that implements a scan tree-based scanner base.""" + + def __init__(self, specification_store): + """Initializes the scanner. + + Args: + specification_store: the specification store (instance of + SpecificationStore) that contains the format + specifications. + """ + super(ScanTreeScannerBase, self).__init__() + self._scan_tree = None + self._specification_store = specification_store + + def _ScanBufferScanState( + self, scan_tree_object, scan_state, data, data_size, total_data_offset, + total_data_size=None): + """Scans a buffer using the scan tree. + + This function implements a Boyer–Moore–Horspool equivalent approach + in combination with the scan tree. + + Args: + scan_tree_object: the scan tree (instance of ScanTree). + scan_state: the scan state (instance of ScanState). + data: a buffer containing raw data. + data_size: the size of the raw data in the buffer. + total_data_offset: the offset of the data relative to the start of + the total data scanned. + total_data_size: optional value to indicate the total data size. + The default is None. + + Raises: + RuntimeError: if the total data offset, total data size or the last + pattern offset value is out of bounds + """ + if total_data_size is not None and total_data_size < 0: + raise RuntimeError(u'Invalid total data size, value out of bounds.') + + if total_data_offset < 0 or ( + total_data_size is not None and total_data_offset >= total_data_size): + raise RuntimeError(u'Invalid total data offset, value out of bounds.') + + data_offset = 0 + scan_tree_node = scan_state.scan_tree_node + + if scan_state.remaining_data: + # str.join() should be more efficient then concatenation by +. + data = ''.join([scan_state.remaining_data, data]) + data_size += scan_state.remaining_data_size + + scan_state.remaining_data = None + scan_state.remaining_data_size = 0 + + if (total_data_size is not None and + total_data_offset + data_size >= total_data_size): + match_on_boundary = True + else: + match_on_boundary = False + + while data_offset < data_size: + if (not match_on_boundary and + data_offset + scan_tree_object.largest_length >= data_size): + break + + found_match = False + scan_done = False + + while not scan_done: + scan_object = scan_tree_node.CompareByteValue( + data, data_offset, data_size, total_data_offset, + total_data_size=total_data_size) + + if isinstance(scan_object, scan_tree.ScanTreeNode): + scan_tree_node = scan_object + else: + scan_done = True + + if isinstance(scan_object, patterns.Pattern): + pattern_length = len(scan_object.signature.expression) + data_last_offset = data_offset + pattern_length + + if cmp(scan_object.signature.expression, + data[data_offset:data_last_offset]) == 0: + + if (not scan_object.signature.is_bound or + scan_object.signature.offset == data_offset): + found_match = True + + logging.debug( + u'Signature match at data offset: 0x{0:08x}.'.format( + data_offset)) + + scan_state.AddMatch(total_data_offset + data_offset, scan_object) + + if found_match: + skip_value = len(scan_object.signature.expression) + scan_tree_node = scan_tree_object.root_node + else: + last_pattern_offset = ( + scan_tree_object.skip_table.skip_pattern_length - 1) + + if data_offset + last_pattern_offset >= data_size: + raise RuntimeError( + u'Invalid last pattern offset, value out of bounds.') + skip_value = 0 + + while last_pattern_offset >= 0 and not skip_value: + last_data_offset = data_offset + last_pattern_offset + byte_value = ord(data[last_data_offset]) + skip_value = scan_tree_object.skip_table[byte_value] + last_pattern_offset -= 1 + + if not skip_value: + skip_value = 1 + + scan_tree_node = scan_tree_object.root_node + + data_offset += skip_value + + if not match_on_boundary and data_offset < data_size: + scan_state.remaining_data = data[data_offset:data_size] + scan_state.remaining_data_size = data_size - data_offset + + scan_state.Scanning(scan_tree_node, total_data_offset + data_offset) + + def _ScanBufferScanStateFinal(self, scan_tree_object, scan_state): + """Scans the remaining data in the scan state using the scan tree. + + Args: + scan_tree_object: the scan tree (instance of ScanTree). + scan_state: the scan state (instance of ScanState). + """ + if scan_state.remaining_data: + data = scan_state.remaining_data + data_size = scan_state.remaining_data_size + + scan_state.remaining_data = None + scan_state.remaining_data_size = 0 + + # Setting the total data size will make boundary matches are returned + # in this scanning pass. + total_data_size = scan_state.total_data_size + if total_data_size is None: + total_data_size = scan_state.total_data_offset + data_size + + self._ScanBufferScanState( + scan_tree_object, scan_state, data, data_size, + scan_state.total_data_offset, total_data_size=total_data_size) + + scan_state.Stop() + + def GetScanResults(self, scan_state): + """Retrieves the scan results. + + Args: + scan_state: the scan state (instance of ScanState). + + Return: + A list of scan results (instances of _ScanResult). + """ + scan_results = {} + + for scan_match in scan_state.GetMatches(): + specification = scan_match.specification + identifier = specification.identifier + + logging.debug( + u'Scan match at offset: 0x{0:08x} specification: {1:s}'.format( + scan_match.total_data_offset, identifier)) + + if identifier not in scan_results: + scan_results[identifier] = _ScanResult(specification) + + scan_results[identifier].scan_matches.append(scan_match) + + return scan_results.values() + + +class Scanner(ScanTreeScannerBase): + """Class that implements a scan tree-based scanner.""" + + _READ_BUFFER_SIZE = 512 + + def __init__(self, specification_store): + """Initializes the scanner. + + Args: + specification_store: the specification store (instance of + SpecificationStore) that contains the format + specifications. + """ + super(Scanner, self).__init__(specification_store) + + def ScanBuffer(self, scan_state, data, data_size): + """Scans a buffer. + + Args: + scan_state: the scan state (instance of ScanState). + data: a buffer containing raw data. + data_size: the size of the raw data in the buffer. + """ + self._ScanBufferScanState( + self._scan_tree, scan_state, data, data_size, + scan_state.total_data_offset, + total_data_size=scan_state.total_data_size) + + def ScanFileObject(self, file_object): + """Scans a file-like object. + + Args: + file_object: a file-like object. + + Returns: + A list of scan results (instances of ScanResult). + """ + file_offset = 0 + + if hasattr(file_object, 'get_size'): + file_size = file_object.get_size() + else: + file_object.seek(0, os.SEEK_END) + file_size = file_object.tell() + + scan_state = self.StartScan(total_data_size=file_size) + + file_object.seek(file_offset, os.SEEK_SET) + + while file_offset < file_size: + data = file_object.read(self._READ_BUFFER_SIZE) + data_size = len(data) + + if data_size == 0: + break + + self._ScanBufferScanState( + self._scan_tree, scan_state, data, data_size, file_offset, + total_data_size=file_size) + + file_offset += data_size + + self.StopScan(scan_state) + + return self.GetScanResults(scan_state) + + def StartScan(self, total_data_size=None): + """Starts a scan. + + The function sets up the scanning related structures if necessary. + + Args: + total_data_size: optional value to indicate the total data size. + The default is None. + Returns: + A scan state (instance of ScanState). + + Raises: + RuntimeError: when total data size is invalid. + """ + if total_data_size is not None and total_data_size < 0: + raise RuntimeError(u'Invalid total data size.') + + if self._scan_tree is None: + self._scan_tree = scan_tree.ScanTree( + self._specification_store, None) + + return ScanState(self._scan_tree.root_node, total_data_size=total_data_size) + + def StopScan(self, scan_state): + """Stops a scan. + + Args: + scan_state: the scan state (instance of ScanState). + """ + self._ScanBufferScanStateFinal(self._scan_tree, scan_state) + + +class OffsetBoundScanner(ScanTreeScannerBase): + """Class that implements an offset-bound scan tree-based scanner.""" + + _READ_BUFFER_SIZE = 512 + + def __init__(self, specification_store): + """Initializes the scanner. + + Args: + specification_store: the specification store (instance of + SpecificationStore) that contains the format + specifications. + """ + super(OffsetBoundScanner, self).__init__(specification_store) + self._footer_scan_tree = None + self._footer_spanning_range = None + self._header_scan_tree = None + self._header_spanning_range = None + + def _GetFooterRange(self, total_data_size): + """Retrieves the read buffer aligned footer range. + + Args: + total_data_size: optional value to indicate the total data size. + The default is None. + Returns: + A range (instance of Range). + """ + # The actual footer range is in reverse since the spanning footer range + # is based on positive offsets, where 0 is the end of file. + if self._footer_spanning_range.end_offset < total_data_size: + footer_range_start_offset = ( + total_data_size - self._footer_spanning_range.end_offset) + else: + footer_range_start_offset = 0 + + # Calculate the lower bound modulus of the footer range start offset + # in increments of the read buffer size. + footer_range_start_offset /= self._READ_BUFFER_SIZE + footer_range_start_offset *= self._READ_BUFFER_SIZE + + # Calculate the upper bound modulus of the footer range size + # in increments of the read buffer size. + footer_range_size = self._footer_spanning_range.size + remainder = footer_range_size % self._READ_BUFFER_SIZE + footer_range_size /= self._READ_BUFFER_SIZE + + if remainder > 0: + footer_range_size += 1 + + footer_range_size *= self._READ_BUFFER_SIZE + + return range_list.Range(footer_range_start_offset, footer_range_size) + + def _GetHeaderRange(self): + """Retrieves the read buffer aligned header range. + + Returns: + A range (instance of Range). + """ + # Calculate the lower bound modulus of the header range start offset + # in increments of the read buffer size. + header_range_start_offset = self._header_spanning_range.start_offset + header_range_start_offset /= self._READ_BUFFER_SIZE + header_range_start_offset *= self._READ_BUFFER_SIZE + + # Calculate the upper bound modulus of the header range size + # in increments of the read buffer size. + header_range_size = self._header_spanning_range.size + remainder = header_range_size % self._READ_BUFFER_SIZE + header_range_size /= self._READ_BUFFER_SIZE + + if remainder > 0: + header_range_size += 1 + + header_range_size *= self._READ_BUFFER_SIZE + + return range_list.Range(header_range_start_offset, header_range_size) + + def _ScanBufferScanState( + self, scan_tree_object, scan_state, data, data_size, total_data_offset, + total_data_size=None): + """Scans a buffer using the scan tree. + + This function implements a Boyer–Moore–Horspool equivalent approach + in combination with the scan tree. + + Args: + scan_tree_object: the scan tree (instance of ScanTree). + scan_state: the scan state (instance of ScanState). + data: a buffer containing raw data. + data_size: the size of the raw data in the buffer. + total_data_offset: the offset of the data relative to the start of + the total data scanned. + total_data_size: optional value to indicate the total data size. + The default is None. + """ + scan_done = False + scan_tree_node = scan_tree_object.root_node + + while not scan_done: + data_offset = 0 + + scan_object = scan_tree_node.CompareByteValue( + data, data_offset, data_size, total_data_offset, + total_data_size=total_data_size) + + if isinstance(scan_object, scan_tree.ScanTreeNode): + scan_tree_node = scan_object + else: + scan_done = True + + if isinstance(scan_object, patterns.Pattern): + pattern_length = len(scan_object.signature.expression) + pattern_start_offset = scan_object.signature.offset + pattern_end_offset = pattern_start_offset + pattern_length + + if cmp(scan_object.signature.expression, + data[pattern_start_offset:pattern_end_offset]) == 0: + scan_state.AddMatch( + total_data_offset + scan_object.signature.offset, scan_object) + + logging.debug( + u'Signature match at data offset: 0x{0:08x}.'.format(data_offset)) + + # TODO: implement. + # def ScanBuffer(self, scan_state, data, data_size): + # """Scans a buffer. + + # Args: + # scan_state: the scan state (instance of ScanState). + # data: a buffer containing raw data. + # data_size: the size of the raw data in the buffer. + # """ + # # TODO: fix footer scanning logic. + # # need to know the file size here for the footers. + + # # TODO: check for clashing ranges? + + # header_range = self._GetHeaderRange() + # footer_range = self._GetFooterRange(scan_state.total_data_size) + + # if self._scan_tree == self._header_scan_tree: + # if (scan_state.total_data_offset >= header_range.start_offset and + # scan_state.total_data_offset < header_range.end_offset): + # self._ScanBufferScanState( + # self._scan_tree, scan_state, data, data_size, + # scan_state.total_data_offset, + # total_data_size=scan_state.total_data_size) + + # elif scan_state.total_data_offset > header_range.end_offset: + # # TODO: implement. + # pass + + # if self._scan_tree == self._footer_scan_tree: + # if (scan_state.total_data_offset >= footer_range.start_offset and + # scan_state.total_data_offset < footer_range.end_offset): + # self._ScanBufferScanState( + # self._scan_tree, scan_state, data, data_size, + # scan_state.total_data_offset, + # total_data_size=scan_state.total_data_size) + + def ScanFileObject(self, file_object): + """Scans a file-like object. + + Args: + file_object: a file-like object. + + Returns: + A scan state (instance of ScanState). + """ + # TODO: add support for fixed size block-based reads. + + if hasattr(file_object, 'get_size'): + file_size = file_object.get_size() + else: + file_object.seek(0, os.SEEK_END) + file_size = file_object.tell() + + file_offset = 0 + scan_state = self.StartScan(total_data_size=file_size) + + if self._header_scan_tree.root_node is not None: + header_range = self._GetHeaderRange() + + # TODO: optimize the read by supporting fixed size block-based reads. + # if file_offset < header_range.start_offset: + # file_offset = header_range.start_offset + + file_object.seek(file_offset, os.SEEK_SET) + + # TODO: optimize the read by supporting fixed size block-based reads. + # data = file_object.read(header_range.size) + data = file_object.read(header_range.end_offset) + data_size = len(data) + + if data_size > 0: + self._ScanBufferScanState( + self._scan_tree, scan_state, data, data_size, file_offset, + total_data_size=file_size) + + file_offset += data_size + + if self._footer_scan_tree.root_node is not None: + self.StopScan(scan_state) + + self._scan_tree = self._footer_scan_tree + scan_state.Reset(self._scan_tree.root_node) + + if self._footer_scan_tree.root_node is not None: + footer_range = self._GetFooterRange(file_size) + + # Note that the offset in the footer scan tree start with 0. Make sure + # the data offset of the data being scanned is aligned with the offset + # in the scan tree. + if footer_range.start_offset < self._footer_spanning_range.end_offset: + data_offset = ( + self._footer_spanning_range.end_offset - footer_range.start_offset) + else: + data_offset = 0 + + if file_offset < footer_range.start_offset: + file_offset = footer_range.start_offset + + file_object.seek(file_offset, os.SEEK_SET) + + data = file_object.read(self._READ_BUFFER_SIZE) + data_size = len(data) + + if data_size > 0: + self._ScanBufferScanState( + self._scan_tree, scan_state, data[data_offset:], + data_size - data_offset, file_offset + data_offset, + total_data_size=file_size) + + self.StopScan(scan_state) + + return self.GetScanResults(scan_state) + + def StartScan(self, total_data_size=None): + """Starts a scan. + + The function sets up the scanning related structures if necessary. + + Args: + total_data_size: optional value to indicate the total data size. + The default is None. + Returns: + A list of scan results (instances of ScanResult). + + Raises: + RuntimeError: when total data size is invalid. + """ + if total_data_size is None or total_data_size < 0: + raise RuntimeError(u'Invalid total data size.') + + if self._header_scan_tree is None: + self._header_scan_tree = scan_tree.ScanTree( + self._specification_store, True, + offset_mode=scan_tree.ScanTree.OFFSET_MODE_POSITIVE) + + if self._header_spanning_range is None: + spanning_range = self._header_scan_tree.range_list.GetSpanningRange() + self._header_spanning_range = spanning_range + + if self._footer_scan_tree is None: + self._footer_scan_tree = scan_tree.ScanTree( + self._specification_store, True, + offset_mode=scan_tree.ScanTree.OFFSET_MODE_NEGATIVE) + + if self._footer_spanning_range is None: + spanning_range = self._footer_scan_tree.range_list.GetSpanningRange() + self._footer_spanning_range = spanning_range + + if self._header_scan_tree.root_node is not None: + self._scan_tree = self._header_scan_tree + elif self._footer_scan_tree.root_node is not None: + self._scan_tree = self._footer_scan_tree + else: + self._scan_tree = None + + if self._scan_tree is not None: + root_node = self._scan_tree.root_node + else: + root_node = None + + return ScanState(root_node, total_data_size=total_data_size) + + def StopScan(self, scan_state): + """Stops a scan. + + Args: + scan_state: the scan state (instance of ScanState). + """ + self._ScanBufferScanStateFinal(self._scan_tree, scan_state) + self._scan_tree = None diff --git a/plaso/classifier/scanner_test.py b/plaso/classifier/scanner_test.py new file mode 100644 index 0000000..32098f5 --- /dev/null +++ b/plaso/classifier/scanner_test.py @@ -0,0 +1,119 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains tests for the format scanner classes.""" + +import unittest + +from plaso.classifier import scanner +from plaso.classifier import test_lib + + +class ScannerTest(unittest.TestCase): + """Class to test the scanner.""" + + def testInitialize(self): + """Function to test the initialize function.""" + store = test_lib.CreateSpecificationStore() + + # Signature for LNK + data1 = ('\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00' + '\x00\x00\x00\x46') + + # Signature for REGF + data2 = 'regf' + + # Random data + data3 = '\x01\xfa\xe0\xbe\x99\x8e\xdb\x70\xea\xcc\x6b\xae\x2f\xf5\xa2\xe4' + + # Boundary scan test + data4a = ('\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00PK') + data4b = ('\x07\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Z') + + # Large buffer test + data5_size = 1024 * 1024 + data5 = '\x00' * (data5_size - 4) + data5 += 'PK\x07\x08' + + test_scanner = scanner.Scanner(store) + + total_data_size = len(data1) + scan_state = test_scanner.StartScan(total_data_size=total_data_size) + test_scanner.ScanBuffer(scan_state, data1, len(data1)) + test_scanner.StopScan(scan_state) + + self.assertEqual(len(scan_state.GetMatches()), 1) + + scan_state = test_scanner.StartScan(total_data_size=None) + test_scanner.ScanBuffer(scan_state, data1, len(data1)) + test_scanner.StopScan(scan_state) + + self.assertEqual(len(scan_state.GetMatches()), 1) + + total_data_size = len(data2) + scan_state = test_scanner.StartScan(total_data_size=total_data_size) + test_scanner.ScanBuffer(scan_state, data2, len(data2)) + test_scanner.StopScan(scan_state) + + self.assertEqual(len(scan_state.GetMatches()), 1) + + scan_state = test_scanner.StartScan(total_data_size=None) + test_scanner.ScanBuffer(scan_state, data2, len(data2)) + test_scanner.StopScan(scan_state) + + self.assertEqual(len(scan_state.GetMatches()), 1) + + total_data_size = len(data3) + scan_state = test_scanner.StartScan(total_data_size=total_data_size) + test_scanner.ScanBuffer(scan_state, data3, len(data3)) + test_scanner.StopScan(scan_state) + + self.assertEqual(len(scan_state.GetMatches()), 0) + + scan_state = test_scanner.StartScan(total_data_size=None) + test_scanner.ScanBuffer(scan_state, data3, len(data3)) + test_scanner.StopScan(scan_state) + + self.assertEqual(len(scan_state.GetMatches()), 0) + + total_data_size = len(data4a) + len(data4b) + scan_state = test_scanner.StartScan(total_data_size=total_data_size) + test_scanner.ScanBuffer(scan_state, data4a, len(data4a)) + test_scanner.ScanBuffer(scan_state, data4b, len(data4b)) + test_scanner.StopScan(scan_state) + + self.assertEqual(len(scan_state.GetMatches()), 1) + + scan_state = test_scanner.StartScan(total_data_size=None) + test_scanner.ScanBuffer(scan_state, data4a, len(data4a)) + test_scanner.ScanBuffer(scan_state, data4b, len(data4b)) + test_scanner.StopScan(scan_state) + + self.assertEqual(len(scan_state.GetMatches()), 1) + + total_data_size = len(data5) + scan_state = test_scanner.StartScan(total_data_size=total_data_size) + test_scanner.ScanBuffer(scan_state, data5, len(data5)) + test_scanner.StopScan(scan_state) + + self.assertEqual(len(scan_state.GetMatches()), 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/classifier/specification.py b/plaso/classifier/specification.py new file mode 100644 index 0000000..2e37fbe --- /dev/null +++ b/plaso/classifier/specification.py @@ -0,0 +1,156 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The format specification classes.""" + + +class Signature(object): + """Class that defines a signature of a format specification. + + The signature consists of a byte string expression, an optional + offset relative to the start of the data, and a value to indidate + if the expression is bound to the offset. + """ + def __init__(self, expression, offset=None, is_bound=False): + """Initializes the signature. + + Args: + expression: string containing the expression of the signature. + The expression consists of a byte string at the moment + regular expression (regexp) are not supported. + offset: the offset of the signature or None by default. None is used + to indicate the signature has no offset. A positive offset + is relative from the start of the data a negative offset + is relative from the end of the data. + is_bound: boolean value to indicate the signature must be bound to + the offset or False by default. + """ + self.expression = expression + self.offset = offset + self.is_bound = is_bound + + +class Specification(object): + """Class that contains a format specification.""" + + def __init__(self, identifier): + """Initializes the specification. + + Args: + identifier: string containing a unique name for the format. + """ + self.identifier = identifier + self.mime_types = [] + self.signatures = [] + self.universal_type_identifiers = [] + + def AddMimeType(self, mime_type): + """Adds a MIME type.""" + self.mime_types.append(mime_type) + + def AddNewSignature(self, expression, offset=None, is_bound=False): + """Adds a signature. + + Args: + expression: string containing the expression of the signature. + offset: the offset of the signature or None by default. None is used + to indicate the signature has no offset. A positive offset + is relative from the start of the data a negative offset + is relative from the end of the data. + is_bound: boolean value to indicate the signature must be bound to + the offset or False by default. + """ + self.signatures.append( + Signature(expression, offset=offset, is_bound=is_bound)) + + def AddUniversalTypeIdentifier(self, universal_type_identifiers): + """Adds a Universal Type Identifier (UTI).""" + self.universal_type_identifiers.append(universal_type_identifiers) + + +class SpecificationStore(object): + """Class that servers as a store for specifications.""" + + def __init__(self): + """Initializes the specification store.""" + self._format_specifications = {} + + @property + def specifications(self): + """A specifications iterator object.""" + return self._format_specifications.itervalues() + + def AddNewSpecification(self, identifier): + """Adds a new specification. + + Args: + identifier: a string containing the format identifier, + which should be unique for the store. + + Returns: + a instance of Specification. + + Raises: + ValueError: if the store already contains a specification with + the same identifier. + """ + if identifier in self._format_specifications: + raise ValueError("specification {0:s} is already defined in " + "store.".format(identifier)) + + self._format_specifications[identifier] = Specification(identifier) + + return self._format_specifications[identifier] + + def AddSpecification(self, specification): + """Adds a specification. + + Args: + specification: the specification (instance of Specification). + + Raises: + KeyError: if the store already contains a specification with + the same identifier. + """ + if specification.identifier in self._format_specifications: + raise KeyError( + u'Specification {0:s} is already defined in store.'.format( + specification.identifier)) + + self._format_specifications[specification.identifier] = specification + + def ReadFromFileObject(self, unused_file_object): + """Reads the specification store from a file-like object. + + Args: + unused_file_object: A file-like object. + + Raises: + RuntimeError: because functionality is not implemented yet. + """ + # TODO: implement this function. + raise RuntimeError(u'Function not implemented.') + + def ReadFromFile(self, filename): + """Reads the specification store from a file. + + Args: + filename: The name of the file. + """ + file_object = open(filename, 'r') + self.ReadFromFileObject(file_object) + file_object.close() diff --git a/plaso/classifier/specification_test.py b/plaso/classifier/specification_test.py new file mode 100644 index 0000000..6e578bb --- /dev/null +++ b/plaso/classifier/specification_test.py @@ -0,0 +1,46 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the format specification classes.""" + +import unittest + +from plaso.classifier import specification + + +class SpecificationStoreTest(unittest.TestCase): + """Class to test the specification store.""" + + def testAddSpecification(self): + """Function to test the add specification function.""" + store = specification.SpecificationStore() + + format_regf = specification.Specification('REGF') + format_regf.AddNewSignature('regf', offset=0) + + format_esedb = specification.Specification('ESEDB') + format_esedb.AddNewSignature('\xef\xcd\xab\x89', offset=4) + + store.AddSpecification(format_regf) + store.AddSpecification(format_esedb) + + with self.assertRaises(KeyError): + store.AddSpecification(format_regf) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/classifier/test_lib.py b/plaso/classifier/test_lib.py new file mode 100644 index 0000000..d0fc964 --- /dev/null +++ b/plaso/classifier/test_lib.py @@ -0,0 +1,113 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Shared test cases.""" + +from plaso.classifier import specification + + +def CreateSpecificationStore(): + """Creates a format specification store for testing purposes. + + Returns: + A format specification store (instance of SpecificationStore). + """ + store = specification.SpecificationStore() + + test_specification = store.AddNewSpecification('7zip') + test_specification.AddMimeType('application/x-7z-compressed') + test_specification.AddUniversalTypeIdentifier('org.7-zip.7-zip-archive') + test_specification.AddNewSignature('7z\xbc\xaf\x27\x1c', offset=0) + + test_specification = store.AddNewSpecification('esedb') + test_specification.AddNewSignature( + '\xef\xcd\xab\x89', offset=4, is_bound=True) + + test_specification = store.AddNewSpecification('evt') + test_specification.AddNewSignature( + '\x30\x00\x00\x00LfLe\x01\x00\x00\x00\x01\x00\x00\x00', offset=0, + is_bound=True) + + test_specification = store.AddNewSpecification('evtx') + test_specification.AddNewSignature('ElfFile\x00', offset=0, is_bound=True) + + test_specification = store.AddNewSpecification('ewf') + test_specification.AddNewSignature( + 'EVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True) + + test_specification = specification.Specification('ewf_logical') + test_specification.AddNewSignature( + 'LVF\x09\x0d\x0a\xff\x00', offset=0, is_bound=True) + + test_specification = store.AddNewSpecification('lnk') + test_specification.AddNewSignature( + '\x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00' + '\x00\x00\x00\x46', offset=0) + + test_specification = store.AddNewSpecification('msiecf_index_dat') + test_specification.AddNewSignature( + 'Client UrlCache MMF Ver ', offset=0, is_bound=True) + + test_specification = store.AddNewSpecification('nk2') + test_specification.AddNewSignature( + '\x0d\xf0\xad\xba\xa0\x00\x00\x00\x01\x00\x00\x00', offset=0, + is_bound=True) + + test_specification = store.AddNewSpecification('olecf') + test_specification.AddNewSignature( + '\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1', offset=0, is_bound=True) + test_specification.AddNewSignature( + '\x0e\x11\xfc\x0d\xd0\xcf\x11\x0e', offset=0, is_bound=True) + + test_specification = store.AddNewSpecification('pff') + test_specification.AddNewSignature('!BDN', offset=0, is_bound=True) + + test_specification = store.AddNewSpecification('qcow') + test_specification.AddNewSignature('QFI\xfb', offset=0, is_bound=True) + + test_specification = store.AddNewSpecification('rar') + test_specification.AddMimeType('application/x-rar-compressed') + test_specification.AddUniversalTypeIdentifier('com.rarlab.rar-archive') + test_specification.AddNewSignature( + 'Rar!\x1a\x07\x00', offset=0, is_bound=True) + + test_specification = store.AddNewSpecification('regf') + test_specification.AddNewSignature('regf', offset=0, is_bound=True) + + test_specification = store.AddNewSpecification('thumbache_db_cache') + test_specification.AddNewSignature('CMMM', offset=0, is_bound=True) + + test_specification = store.AddNewSpecification('thumbache_db_index') + test_specification.AddNewSignature('IMMM', offset=0, is_bound=True) + + test_specification = store.AddNewSpecification('zip') + test_specification.AddMimeType('application/zip') + test_specification.AddUniversalTypeIdentifier('com.pkware.zip-archive') + # WinZip 8 signature. + test_specification.AddNewSignature('PK00', offset=0, is_bound=True) + test_specification.AddNewSignature('PK\x01\x02') + test_specification.AddNewSignature('PK\x03\x04', offset=0) + test_specification.AddNewSignature('PK\x05\x05') + # Will be at offset 0 when the archive is empty. + test_specification.AddNewSignature('PK\x05\x06', offset=-22, is_bound=True) + test_specification.AddNewSignature('PK\x06\x06') + test_specification.AddNewSignature('PK\x06\x07') + test_specification.AddNewSignature('PK\x06\x08') + # Will be at offset 0 when this is spanned archive. + test_specification.AddNewSignature('PK\x07\x08') + + return store diff --git a/plaso/engine/__init__.py b/plaso/engine/__init__.py new file mode 100644 index 0000000..f462564 --- /dev/null +++ b/plaso/engine/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/engine/classifier.py b/plaso/engine/classifier.py new file mode 100644 index 0000000..301bb36 --- /dev/null +++ b/plaso/engine/classifier.py @@ -0,0 +1,202 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The file format classifier.""" + +# TODO: rewrite most of the classifier in C and integrate with the code in: +# plaso/classifier + +import gzip +import logging +import os +import tarfile +import zipfile +import zlib + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.lib import errors + + +class Classifier(object): + """Class that defines the file format classifier.""" + + _MAGIC_VALUES = { + 'ZIP': {'length': 4, 'offset': 0, 'values': ['P', 'K', '\x03', '\x04']}, + 'TAR': {'length': 5, 'offset': 257, 'values': ['u', 's', 't', 'a', 'r']}, + 'GZ': {'length': 2, 'offset': 0, 'values': ['\x1f', '\x8b']}, + } + + # TODO: Remove this logic when the classifier is ready. + # This is only used temporary until files can be classified. + magic_max_length = 0 + + # Defines the maximum depth into a file (for SmartOpenFiles). + MAX_FILE_DEPTH = 3 + + @classmethod + def _SmartOpenFile(cls, file_entry): + """Return a generator for all pathspec protobufs extracted from a file. + + If the file is compressed then extract all members and include + them into the processing queue. + + Args: + file_entry: The file entry object. + + Yields: + A path specification (instance of dfvfs.PathSpec) of embedded file + entries. + """ + file_object = file_entry.GetFileObject() + + # TODO: Remove when classifier gets deployed. Then we + # call the classifier here and use that for definition (and + # then we forward the classifier definition in the pathspec + # protobuf. + file_object.seek(0, os.SEEK_SET) + + if not cls.magic_max_length: + for magic_value in cls._MAGIC_VALUES.values(): + cls.magic_max_length = max( + cls.magic_max_length, + magic_value['length'] + magic_value['offset']) + + header = file_object.read(cls.magic_max_length) + + file_classification = '' + # Go over each and every magic value defined and compare + # each read byte (according to original offset and current one) + # If all match, then we have a particular file format and we + # can move on. + for m_value, m_dict in cls._MAGIC_VALUES.items(): + length = m_dict['length'] + m_dict['offset'] + if len(header) < length: + continue + + offset = m_dict['offset'] + magic = m_dict['values'] + + if header[offset:offset + len(magic)] == ''.join(magic): + file_classification = m_value + break + + # TODO: refactor the file type specific code into sub functions. + if file_classification == 'ZIP': + try: + file_object.seek(0, os.SEEK_SET) + zip_file = zipfile.ZipFile(file_object, 'r') + + # TODO: Make this is a more "sane" check, and perhaps + # not entirely skip the file if it has this particular + # ending, but for now, this both slows the tool down + # considerably and makes it also more unstable. + _, _, filename_extension = file_entry.name.rpartition(u'.') + + if filename_extension in [u'.jar', u'.sym', u'.xpi']: + file_object.close() + logging.debug( + u'Unsupported ZIP sub type: {0:s} detected in file: {1:s}'.format( + filename_extension, file_entry.path_spec.comparable)) + return + + for info in zip_file.infolist(): + if info.file_size > 0: + logging.debug( + u'Including: {0:s} from ZIP into process queue.'.format( + info.filename)) + + yield path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_ZIP, location=info.filename, + parent=file_entry.path_spec) + + except zipfile.BadZipfile: + pass + + elif file_classification == 'GZ': + try: + type_indicator = file_entry.path_spec.type_indicator + if type_indicator == definitions.TYPE_INDICATOR_GZIP: + raise errors.SameFileType + + file_object.seek(0, os.SEEK_SET) + gzip_file = gzip.GzipFile(fileobj=file_object, mode='rb') + _ = gzip_file.read(4) + gzip_file.close() + + logging.debug(( + u'Including: {0:s} as GZIP compressed stream into process ' + u'queue.').format(file_entry.name)) + + yield path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_GZIP, parent=file_entry.path_spec) + + except (IOError, zlib.error, errors.SameFileType): + pass + + # TODO: Add BZ2 support. + elif file_classification == 'TAR': + try: + file_object.seek(0, os.SEEK_SET) + tar_file = tarfile.open(fileobj=file_object, mode='r') + + for name_info in tar_file.getmembers(): + if not name_info.isfile(): + continue + + name = name_info.path + logging.debug( + u'Including: {0:s} from TAR into process queue.'.format(name)) + + yield path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TAR, location=name, + parent=file_entry.path_spec) + + except tarfile.ReadError: + pass + + file_object.close() + + @classmethod + def SmartOpenFiles(cls, file_entry, depth=0): + """Generate a list of all available PathSpecs extracted from a file. + + Args: + file_entry: A file entry object. + depth: Incrementing number that defines the current depth into + a file (file inside a ZIP file is depth 1, file inside a tar.gz + would be of depth 2). + + Yields: + A file entry object (instance of dfvfs.FileEntry). + """ + if depth >= cls.MAX_FILE_DEPTH: + return + + for path_spec in cls._SmartOpenFile(file_entry): + sub_file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) + if sub_file_entry is None: + logging.debug( + u'Unable to open file: {0:s}'.format(path_spec.comparable)) + continue + yield sub_file_entry + + depth += 1 + for sub_file_entry in cls.SmartOpenFiles(sub_file_entry, depth=depth): + yield sub_file_entry diff --git a/plaso/engine/collector.py b/plaso/engine/collector.py new file mode 100644 index 0000000..e4c7af6 --- /dev/null +++ b/plaso/engine/collector.py @@ -0,0 +1,421 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generic collector that supports both file system and image files.""" + +import hashlib +import logging +import os + +from dfvfs.helpers import file_system_searcher +from dfvfs.lib import definitions as dfvfs_definitions +from dfvfs.lib import errors as dfvfs_errors +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.engine import queue +from plaso.lib import errors + + +class Collector(queue.ItemQueueProducer): + """Class that implements a collector object.""" + + def __init__( + self, process_queue, source_path, source_path_spec, + resolver_context=None): + """Initializes the collector object. + + The collector discovers all the files that need to be processed by + the workers. Once a file is discovered it is added to the process queue + as a path specification (instance of dfvfs.PathSpec). + + Args: + process_queue: The process queue (instance of Queue). This queue contains + the file entries that need to be processed. + source_path: Path of the source file or directory. + source_path_spec: The source path specification (instance of + dfvfs.PathSpec) as determined by the file system + scanner. The default is None. + resolver_context: Optional resolver context (instance of dfvfs.Context). + The default is None. + """ + super(Collector, self).__init__(process_queue) + self._filter_find_specs = None + self._fs_collector = FileSystemCollector(process_queue) + self._resolver_context = resolver_context + # TODO: remove the need to pass source_path + self._source_path = os.path.abspath(source_path) + self._source_path_spec = source_path_spec + self._vss_stores = None + + def __enter__(self): + """Enters a with statement.""" + return self + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Exits a with statement.""" + return + + def _ProcessImage(self, volume_path_spec, find_specs=None): + """Processes a volume within a storage media image. + + Args: + volume_path_spec: The path specification of the volume containing + the file system. + find_specs: Optional list of find specifications (instances of + dfvfs.FindSpec). The default is None. + """ + if find_specs: + logging.debug(u'Collecting from image file: {0:s} with filter'.format( + self._source_path)) + else: + logging.debug(u'Collecting from image file: {0:s}'.format( + self._source_path)) + + path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', + parent=volume_path_spec) + + try: + file_system = path_spec_resolver.Resolver.OpenFileSystem( + path_spec, resolver_context=self._resolver_context) + except IOError as exception: + logging.error( + u'Unable to open file system with error: {0:s}'.format(exception)) + return + + try: + self._fs_collector.Collect( + file_system, path_spec, find_specs=find_specs) + except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception: + logging.warning(u'{0:s}'.format(exception)) + + if find_specs: + logging.debug(u'Collection from image with filter FAILED.') + else: + logging.debug(u'Collection from image FAILED.') + return + + if self._abort: + return + + if self._vss_stores: + self._ProcessVSS(volume_path_spec, find_specs=find_specs) + + if find_specs: + logging.debug(u'Collection from image with filter COMPLETED.') + else: + logging.debug(u'Collection from image COMPLETED.') + + def _ProcessVSS(self, volume_path_spec, find_specs=None): + """Processes a VSS volume within a storage media image. + + Args: + volume_path_spec: The path specification of the volume containing + the file system. + find_specs: Optional list of find specifications (instances of + dfvfs.FindSpec). The default is None. + """ + logging.info(u'Processing VSS.') + + vss_path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_VSHADOW, location=u'/', + parent=volume_path_spec) + + vss_file_entry = path_spec_resolver.Resolver.OpenFileEntry( + vss_path_spec, resolver_context=self._resolver_context) + + number_of_vss = vss_file_entry.number_of_sub_file_entries + + # In plaso 1 represents the first store index in dfvfs and pyvshadow 0 + # represents the first store index so 1 is subtracted. + vss_store_range = [store_nr - 1 for store_nr in self._vss_stores] + + for store_index in vss_store_range: + if self._abort: + return + + if find_specs: + logging.info(( + u'Collecting from VSS volume: {0:d} out of: {1:d} ' + u'with filter').format(store_index + 1, number_of_vss)) + else: + logging.info(u'Collecting from VSS volume: {0:d} out of: {1:d}'.format( + store_index + 1, number_of_vss)) + + vss_path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_VSHADOW, store_index=store_index, + parent=volume_path_spec) + path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', + parent=vss_path_spec) + + file_system = path_spec_resolver.Resolver.OpenFileSystem( + path_spec, resolver_context=self._resolver_context) + + try: + self._fs_collector.Collect( + file_system, path_spec, find_specs=find_specs) + except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception: + logging.warning(u'{0:s}'.format(exception)) + + if find_specs: + logging.debug( + u'Collection from VSS store: {0:d} with filter FAILED.'.format( + store_index + 1)) + else: + logging.debug(u'Collection from VSS store: {0:d} FAILED.'.format( + store_index + 1)) + return + + if find_specs: + logging.debug( + u'Collection from VSS store: {0:d} with filter COMPLETED.'.format( + store_index + 1)) + else: + logging.debug(u'Collection from VSS store: {0:d} COMPLETED.'.format( + store_index + 1)) + + def Collect(self): + """Collects files from the source.""" + source_file_entry = path_spec_resolver.Resolver.OpenFileEntry( + self._source_path_spec, resolver_context=self._resolver_context) + + if not source_file_entry: + logging.warning(u'No files to collect.') + self.SignalEndOfInput() + return + + if (not source_file_entry.IsDirectory() and + not source_file_entry.IsFile() and + not source_file_entry.IsDevice()): + raise errors.CollectorError( + u'Source path: {0:s} not a device, file or directory.'.format( + self._source_path)) + + type_indicator = self._source_path_spec.type_indicator + if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS: + if source_file_entry.IsFile(): + self.ProduceItem(self._source_path_spec) + + else: + file_system = path_spec_resolver.Resolver.OpenFileSystem( + self._source_path_spec, resolver_context=self._resolver_context) + + try: + self._fs_collector.Collect( + file_system, self._source_path_spec, + find_specs=self._filter_find_specs) + except (dfvfs_errors.AccessError, + dfvfs_errors.BackEndError) as exception: + logging.warning(u'{0:s}'.format(exception)) + + else: + self._ProcessImage( + self._source_path_spec.parent, find_specs=self._filter_find_specs) + + self.SignalEndOfInput() + + def SetCollectDirectoryMetadata(self, collect_directory_metadata): + """Sets the collect directory metadata flag. + + Args: + collect_directory_metadata: Boolean value to indicate to collect + directory metadata. + """ + self._fs_collector.SetCollectDirectoryMetadata(collect_directory_metadata) + + def SetFilter(self, filter_find_specs): + """Sets the collection filter find specifications. + + Args: + filter_find_specs: List of filter find specifications (instances of + dfvfs.FindSpec). + """ + self._filter_find_specs = filter_find_specs + + def SetVssInformation(self, vss_stores): + """Sets the Volume Shadow Snapshots (VSS) information. + + This function will enable VSS collection. + + Args: + vss_stores: The range of VSS stores to include in the collection, + where 1 represents the first store. + """ + self._vss_stores = vss_stores + + def SignalAbort(self): + """Signals the producer to abort.""" + super(Collector, self).SignalAbort() + self._fs_collector.SignalAbort() + + +class FileSystemCollector(queue.ItemQueueProducer): + """Class that implements a file system collector object.""" + + def __init__(self, process_queue): + """Initializes the collector object. + + The collector discovers all the files that need to be processed by + the workers. Once a file is discovered it is added to the process queue + as a path specification (instance of dfvfs.PathSpec). + + Args: + process_queue: The process queue (instance of Queue). This queue contains + the file entries that need to be processed. + """ + super(FileSystemCollector, self).__init__(process_queue) + self._collect_directory_metadata = True + self._duplicate_file_check = False + self._hashlist = {} + + self.number_of_file_entries = 0 + + def __enter__(self): + """Enters a with statement.""" + return self + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Exits a with statement.""" + return + + def _CalculateNTFSTimeHash(self, file_entry): + """Return a hash value calculated from a NTFS file's metadata. + + Args: + file_entry: The file entry (instance of TSKFileEntry). + + Returns: + A hash value (string) that can be used to determine if a file's timestamp + value has changed. + """ + stat_object = file_entry.GetStat() + ret_hash = hashlib.md5() + + ret_hash.update('atime:{0:d}.{1:d}'.format( + getattr(stat_object, 'atime', 0), + getattr(stat_object, 'atime_nano', 0))) + + ret_hash.update('crtime:{0:d}.{1:d}'.format( + getattr(stat_object, 'crtime', 0), + getattr(stat_object, 'crtime_nano', 0))) + + ret_hash.update('mtime:{0:d}.{1:d}'.format( + getattr(stat_object, 'mtime', 0), + getattr(stat_object, 'mtime_nano', 0))) + + ret_hash.update('ctime:{0:d}.{1:d}'.format( + getattr(stat_object, 'ctime', 0), + getattr(stat_object, 'ctime_nano', 0))) + + return ret_hash.hexdigest() + + def _ProcessDirectory(self, file_entry): + """Processes a directory and extract its metadata if necessary.""" + # Need to do a breadth-first search otherwise we'll hit the Python + # maximum recursion depth. + sub_directories = [] + + for sub_file_entry in file_entry.sub_file_entries: + if self._abort: + return + + try: + if not sub_file_entry.IsAllocated() or sub_file_entry.IsLink(): + continue + except dfvfs_errors.BackEndError as exception: + logging.warning( + u'Unable to process file: {0:s} with error: {1:s}'.format( + sub_file_entry.path_spec.comparable.replace( + u'\n', u';'), exception)) + continue + + # For TSK-based file entries only, ignore the virtual /$OrphanFiles + # directory. + if sub_file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK: + if file_entry.IsRoot() and sub_file_entry.name == u'$OrphanFiles': + continue + + if sub_file_entry.IsDirectory(): + # This check is here to improve performance by not producing + # path specifications that don't get processed. + if self._collect_directory_metadata: + self.ProduceItem(sub_file_entry.path_spec) + self.number_of_file_entries += 1 + + sub_directories.append(sub_file_entry) + + elif sub_file_entry.IsFile(): + # If we are dealing with a VSS we want to calculate a hash + # value based on available timestamps and compare that to previously + # calculated hash values, and only include the file into the queue if + # the hash does not match. + if self._duplicate_file_check: + hash_value = self._CalculateNTFSTimeHash(sub_file_entry) + + inode = getattr(sub_file_entry.path_spec, 'inode', 0) + if inode in self._hashlist: + if hash_value in self._hashlist[inode]: + continue + + self._hashlist.setdefault(inode, []).append(hash_value) + + self.ProduceItem(sub_file_entry.path_spec) + self.number_of_file_entries += 1 + + for sub_file_entry in sub_directories: + if self._abort: + return + + try: + self._ProcessDirectory(sub_file_entry) + except (dfvfs_errors.AccessError, dfvfs_errors.BackEndError) as exception: + logging.warning(u'{0:s}'.format(exception)) + + def Collect(self, file_system, path_spec, find_specs=None): + """Collects files from the file system. + + Args: + file_system: The file system (instance of dfvfs.FileSystem). + path_spec: The path specification (instance of dfvfs.PathSpec). + find_specs: Optional list of find specifications (instances of + dfvfs.FindSpec). The default is None. + """ + if find_specs: + searcher = file_system_searcher.FileSystemSearcher(file_system, path_spec) + + for path_spec in searcher.Find(find_specs=find_specs): + if self._abort: + return + + self.ProduceItem(path_spec) + self.number_of_file_entries += 1 + + else: + file_entry = file_system.GetFileEntryByPathSpec(path_spec) + + self._ProcessDirectory(file_entry) + + def SetCollectDirectoryMetadata(self, collect_directory_metadata): + """Sets the collect directory metadata flag. + + Args: + collect_directory_metadata: Boolean value to indicate to collect + directory metadata. + """ + self._collect_directory_metadata = collect_directory_metadata diff --git a/plaso/engine/collector_test.py b/plaso/engine/collector_test.py new file mode 100644 index 0000000..08c14c8 --- /dev/null +++ b/plaso/engine/collector_test.py @@ -0,0 +1,354 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The unit tests for the generic collector object.""" + +import logging +import os +import shutil +import tempfile +import unittest + +from dfvfs.helpers import file_system_searcher +from dfvfs.lib import definitions as dfvfs_definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import context +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.engine import collector +from plaso.engine import queue +from plaso.engine import single_process +from plaso.engine import utils as engine_utils + + +class TempDirectory(object): + """A self cleaning temporary directory.""" + + def __init__(self): + """Initializes the temporary directory.""" + super(TempDirectory, self).__init__() + self.name = u'' + + def __enter__(self): + """Make this work with the 'with' statement.""" + self.name = tempfile.mkdtemp() + return self.name + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Make this work with the 'with' statement.""" + shutil.rmtree(self.name, True) + + +class TestCollectorQueueConsumer(queue.ItemQueueConsumer): + """Class that implements a test collector queue consumer.""" + + def __init__(self, queue_object): + """Initializes the queue consumer. + + Args: + queue_object: the queue object (instance of Queue). + """ + super(TestCollectorQueueConsumer, self).__init__(queue_object) + self.path_specs = [] + + def _ConsumeItem(self, path_spec): + """Consumes an item callback for ConsumeItems. + + Args: + path_spec: a path specification (instance of dfvfs.PathSpec). + """ + self.path_specs.append(path_spec) + + @property + def number_of_path_specs(self): + """The number of path specifications.""" + return len(self.path_specs) + + def GetFilePaths(self): + """Retrieves a list of file paths from the path specifications.""" + file_paths = [] + for path_spec in self.path_specs: + location = getattr(path_spec, 'location', None) + if location is not None: + file_paths.append(location) + return file_paths + + +class CollectorTestCase(unittest.TestCase): + """The collector test case.""" + + _TEST_DATA_PATH = os.path.join(os.getcwd(), u'test_data') + + # Show full diff results, part of TestCase so does not follow our naming + # conventions. + maxDiff = None + + def _GetTestFilePath(self, path_segments): + """Retrieves the path of a test file relative to the test data directory. + + Args: + path_segments: the path segments inside the test data directory. + + Returns: + A path of the test file. + """ + # Note that we need to pass the individual path segments to os.path.join + # and not a list. + return os.path.join(self._TEST_DATA_PATH, *path_segments) + + +class CollectorTest(CollectorTestCase): + """Tests for the collector.""" + + def testFileSystemCollection(self): + """Test collection on the file system.""" + test_files = [ + self._GetTestFilePath([u'syslog.tgz']), + self._GetTestFilePath([u'syslog.zip']), + self._GetTestFilePath([u'syslog.bz2']), + self._GetTestFilePath([u'wtmp.1'])] + + with TempDirectory() as dirname: + for a_file in test_files: + shutil.copy(a_file, dirname) + + path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname) + + test_collection_queue = single_process.SingleProcessQueue() + resolver_context = context.Context() + test_collector = collector.Collector( + test_collection_queue, dirname, path_spec, + resolver_context=resolver_context) + test_collector.Collect() + + test_collector_queue_consumer = TestCollectorQueueConsumer( + test_collection_queue) + test_collector_queue_consumer.ConsumeItems() + + self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4) + + def testFileSystemWithFilterCollection(self): + """Test collection on the file system with a filter.""" + dirname = u'.' + path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname) + + filter_name = '' + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + filter_name = temp_file.name + temp_file.write('/test_data/testdir/filter_.+.txt\n') + temp_file.write('/test_data/.+evtx\n') + temp_file.write('/AUTHORS\n') + temp_file.write('/does_not_exist/some_file_[0-9]+txt\n') + + test_collection_queue = single_process.SingleProcessQueue() + resolver_context = context.Context() + test_collector = collector.Collector( + test_collection_queue, dirname, path_spec, + resolver_context=resolver_context) + + find_specs = engine_utils.BuildFindSpecsFromFile(filter_name) + test_collector.SetFilter(find_specs) + + test_collector.Collect() + + test_collector_queue_consumer = TestCollectorQueueConsumer( + test_collection_queue) + test_collector_queue_consumer.ConsumeItems() + + try: + os.remove(filter_name) + except (OSError, IOError) as exception: + logging.warning(( + u'Unable to remove temporary file: {0:s} with error: {1:s}').format( + filter_name, exception)) + + # Two files with test_data/testdir/filter_*.txt, AUTHORS + # and test_data/System.evtx. + self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 4) + + paths = test_collector_queue_consumer.GetFilePaths() + + current_directory = os.getcwd() + + expected_path = os.path.join( + current_directory, u'test_data', u'testdir', u'filter_1.txt') + self.assertTrue(expected_path in paths) + + expected_path = os.path.join( + current_directory, u'test_data', u'testdir', u'filter_2.txt') + self.assertFalse(expected_path in paths) + + expected_path = os.path.join( + current_directory, u'test_data', u'testdir', u'filter_3.txt') + self.assertTrue(expected_path in paths) + + expected_path = os.path.join( + current_directory, u'AUTHORS') + self.assertTrue(expected_path in paths) + + def testImageCollection(self): + """Test collection on a storage media image file. + + This images has two files: + + logs/hidden.zip + + logs/sys.tgz + + The hidden.zip file contains one file, syslog, which is the + same for sys.tgz. + + The end results should therefore be: + + logs/hidden.zip (unchanged) + + logs/hidden.zip:syslog (the text file extracted out) + + logs/sys.tgz (unchanged) + + logs/sys.tgz (read as a GZIP file, so not compressed) + + logs/sys.tgz:syslog.gz (A GZIP file from the TAR container) + + logs/sys.tgz:syslog.gz:syslog (the extracted syslog file) + + This means that the collection script should collect 6 files in total. + """ + test_file = self._GetTestFilePath([u'syslog_image.dd']) + + volume_path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file) + path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', + parent=volume_path_spec) + + test_collection_queue = single_process.SingleProcessQueue() + resolver_context = context.Context() + test_collector = collector.Collector( + test_collection_queue, test_file, path_spec, + resolver_context=resolver_context) + test_collector.Collect() + + test_collector_queue_consumer = TestCollectorQueueConsumer( + test_collection_queue) + test_collector_queue_consumer.ConsumeItems() + + self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 3) + + def testImageWithFilterCollection(self): + """Test collection on a storage media image file with a filter.""" + test_file = self._GetTestFilePath([u'ímynd.dd']) + + volume_path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_OS, location=test_file) + path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', + parent=volume_path_spec) + + filter_name = '' + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + filter_name = temp_file.name + temp_file.write('/a_directory/.+zip\n') + temp_file.write('/a_directory/another.+\n') + temp_file.write('/passwords.txt\n') + + test_collection_queue = single_process.SingleProcessQueue() + resolver_context = context.Context() + test_collector = collector.Collector( + test_collection_queue, test_file, path_spec, + resolver_context=resolver_context) + + find_specs = engine_utils.BuildFindSpecsFromFile(filter_name) + test_collector.SetFilter(find_specs) + + test_collector.Collect() + + test_collector_queue_consumer = TestCollectorQueueConsumer( + test_collection_queue) + test_collector_queue_consumer.ConsumeItems() + + try: + os.remove(filter_name) + except (OSError, IOError) as exception: + logging.warning(( + u'Unable to remove temporary file: {0:s} with error: {1:s}').format( + filter_name, exception)) + + self.assertEquals(test_collector_queue_consumer.number_of_path_specs, 2) + + paths = test_collector_queue_consumer.GetFilePaths() + + # path_specs[0] + # type: TSK + # file_path: '/a_directory/another_file' + # container_path: 'test_data/ímynd.dd' + # image_offset: 0 + self.assertEquals(paths[0], u'/a_directory/another_file') + + # path_specs[1] + # type: TSK + # file_path: '/passwords.txt' + # container_path: 'test_data/ímynd.dd' + # image_offset: 0 + self.assertEquals(paths[1], u'/passwords.txt') + + +class BuildFindSpecsFromFileTest(unittest.TestCase): + """Tests for the BuildFindSpecsFromFile function.""" + + def testBuildFindSpecsFromFile(self): + """Tests the BuildFindSpecsFromFile function.""" + filter_name = '' + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + filter_name = temp_file.name + # 2 hits. + temp_file.write('/test_data/testdir/filter_.+.txt\n') + # A single hit. + temp_file.write('/test_data/.+evtx\n') + # A single hit. + temp_file.write('/AUTHORS\n') + temp_file.write('/does_not_exist/some_file_[0-9]+txt\n') + # This should not compile properly, missing file information. + temp_file.write('failing/\n') + # This should not fail during initial loading, but fail later on. + temp_file.write('bad re (no close on that parenthesis/file\n') + + find_specs = engine_utils.BuildFindSpecsFromFile(filter_name) + + try: + os.remove(filter_name) + except (OSError, IOError) as exception: + logging.warning( + u'Unable to remove temporary file: {0:s} with error: {1:s}'.format( + filter_name, exception)) + + self.assertEquals(len(find_specs), 4) + + dirname = u'.' + path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_OS, location=dirname) + file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec) + searcher = file_system_searcher.FileSystemSearcher( + file_system, path_spec) + + path_spec_generator = searcher.Find(find_specs=find_specs) + self.assertNotEquals(path_spec_generator, None) + + path_specs = list(path_spec_generator) + # One evtx, one AUTHORS, two filter_*.txt files, total 4 files. + self.assertEquals(len(path_specs), 4) + + with self.assertRaises(IOError): + _ = engine_utils.BuildFindSpecsFromFile('thisfiledoesnotexist') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/engine/engine.py b/plaso/engine/engine.py new file mode 100644 index 0000000..64ec829 --- /dev/null +++ b/plaso/engine/engine.py @@ -0,0 +1,319 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The processing engine.""" + +import abc +import logging + +from dfvfs.helpers import file_system_searcher +from dfvfs.lib import definitions as dfvfs_definitions +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.artifacts import knowledge_base +from plaso.engine import collector +from plaso.engine import queue +from plaso.lib import errors +from plaso.preprocessors import interface as preprocess_interface +from plaso.preprocessors import manager as preprocess_manager + + +class BaseEngine(object): + """Class that defines the processing engine base.""" + + def __init__(self, collection_queue, storage_queue, parse_error_queue): + """Initialize the engine object. + + Args: + collection_queue: the collection queue object (instance of Queue). + storage_queue: the storage queue object (instance of Queue). + parse_error_queue: the parser error queue object (instance of Queue). + """ + self._collection_queue = collection_queue + self._enable_debug_output = False + self._enable_profiling = False + self._event_queue_producer = queue.ItemQueueProducer(storage_queue) + self._filter_object = None + self._mount_path = None + self._open_files = False + self._parse_error_queue = parse_error_queue + self._parse_error_queue_producer = queue.ItemQueueProducer( + parse_error_queue) + self._profiling_sample_rate = 1000 + self._source = None + self._source_path_spec = None + self._source_file_entry = None + self._text_prepend = None + + self.knowledge_base = knowledge_base.KnowledgeBase() + self.storage_queue = storage_queue + + def CreateCollector( + self, include_directory_stat, vss_stores=None, filter_find_specs=None, + resolver_context=None): + """Creates a collector object. + + The collector discovers all the files that need to be processed by + the workers. Once a file is discovered it is added to the process queue + as a path specification (instance of dfvfs.PathSpec). + + Args: + include_directory_stat: Boolean value to indicate whether directory + stat information should be collected. + vss_stores: Optional list of VSS stores to include in the collection, + where 1 represents the first store. Set to None if no + VSS stores should be processed. The default is None. + filter_find_specs: Optional list of filter find specifications (instances + of dfvfs.FindSpec). The default is None. + resolver_context: Optional resolver context (instance of dfvfs.Context). + The default is None. Note that every thread or process + must have its own resolver context. + + Returns: + A collector object (instance of Collector). + + Raises: + RuntimeError: if source path specification is not set. + """ + if not self._source_path_spec: + raise RuntimeError(u'Missing source.') + + collector_object = collector.Collector( + self._collection_queue, self._source, self._source_path_spec, + resolver_context=resolver_context) + + collector_object.SetCollectDirectoryMetadata(include_directory_stat) + + if vss_stores: + collector_object.SetVssInformation(vss_stores) + + if filter_find_specs: + collector_object.SetFilter(filter_find_specs) + + return collector_object + + @abc.abstractmethod + def CreateExtractionWorker(self, worker_number): + """Creates an extraction worker object. + + Args: + worker_number: A number that identifies the worker. + + Returns: + An extraction worker (instance of worker.ExtractionWorker). + """ + + def GetSourceFileSystemSearcher(self, resolver_context=None): + """Retrieves the file system searcher of the source. + + Args: + resolver_context: Optional resolver context (instance of dfvfs.Context). + The default is None. Note that every thread or process + must have its own resolver context. + + Returns: + The file system searcher object (instance of dfvfs.FileSystemSearcher). + + Raises: + RuntimeError: if source path specification is not set. + """ + if not self._source_path_spec: + raise RuntimeError(u'Missing source.') + + file_system = path_spec_resolver.Resolver.OpenFileSystem( + self._source_path_spec, resolver_context=resolver_context) + + type_indicator = self._source_path_spec.type_indicator + if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS: + mount_point = self._source_path_spec + else: + mount_point = self._source_path_spec.parent + + return file_system_searcher.FileSystemSearcher(file_system, mount_point) + + def PreprocessSource(self, platform, resolver_context=None): + """Preprocesses the source and fills the preprocessing object. + + Args: + platform: string that indicates the platform (operating system). + resolver_context: Optional resolver context (instance of dfvfs.Context). + The default is None. Note that every thread or process + must have its own resolver context. + """ + searcher = self.GetSourceFileSystemSearcher( + resolver_context=resolver_context) + if not platform: + platform = preprocess_interface.GuessOS(searcher) + self.knowledge_base.platform = platform + + preprocess_manager.PreprocessPluginsManager.RunPlugins( + platform, searcher, self.knowledge_base) + + def SetEnableDebugOutput(self, enable_debug_output): + """Enables or disables debug output. + + Args: + enable_debug_output: boolean value to indicate if the debug output + should be enabled. + """ + self._enable_debug_output = enable_debug_output + + def SetEnableProfiling(self, enable_profiling, profiling_sample_rate=1000): + """Enables or disables profiling. + + Args: + enable_debug_output: boolean value to indicate if the profiling + should be enabled. + profiling_sample_rate: optional integer indicating the profiling sample + rate. The value contains the number of files + processed. The default value is 1000. + """ + self._enable_profiling = enable_profiling + self._profiling_sample_rate = profiling_sample_rate + + def SetFilterObject(self, filter_object): + """Sets the filter object. + + Args: + filter_object: the filter object (instance of objectfilter.Filter). + """ + self._filter_object = filter_object + + def SetMountPath(self, mount_path): + """Sets the mount path. + + Args: + mount_path: string containing the mount path. + """ + self._mount_path = mount_path + + # TODO: rename this mode. + def SetOpenFiles(self, open_files): + """Sets the open files mode. + + Args: + open_files: boolean value to indicate if the worker should scan for + file entries inside files. + """ + self._open_files = open_files + + def SetSource(self, source_path_spec, resolver_context=None): + """Sets the source. + + Args: + source_path_spec: The source path specification (instance of + dfvfs.PathSpec) as determined by the file system + scanner. The default is None. + resolver_context: Optional resolver context (instance of dfvfs.Context). + The default is None. Note that every thread or process + must have its own resolver context. + + Raises: + BadConfigOption: if source cannot be set. + """ + path_spec = source_path_spec + while path_spec.parent: + path_spec = path_spec.parent + + # Note that source should be used for output purposes only. + self._source = getattr(path_spec, 'location', u'') + self._source_path_spec = source_path_spec + + self._source_file_entry = path_spec_resolver.Resolver.OpenFileEntry( + self._source_path_spec, resolver_context=resolver_context) + + if not self._source_file_entry: + raise errors.BadConfigOption( + u'No such device, file or directory: {0:s}.'.format(self._source)) + + if (not self._source_file_entry.IsDirectory() and + not self._source_file_entry.IsFile() and + not self._source_file_entry.IsDevice()): + raise errors.CollectorError( + u'Source path: {0:s} not a device, file or directory.'.format( + self._source)) + + if self._source_path_spec.type_indicator in [ + dfvfs_definitions.TYPE_INDICATOR_OS, + dfvfs_definitions.TYPE_INDICATOR_FAKE]: + + if self._source_file_entry.IsFile(): + logging.debug(u'Starting a collection on a single file.') + # No need for multiple workers when parsing a single file. + + elif not self._source_file_entry.IsDirectory(): + raise errors.BadConfigOption( + u'Source: {0:s} has to be a file or directory.'.format( + self._source)) + + # TODO: remove this functionality. + def SetTextPrepend(self, text_prepend): + """Sets the text prepend. + + Args: + text_prepend: string that contains the text to prepend to every + event object. + """ + self._text_prepend = text_prepend + + def SignalAbort(self): + """Signals the engine to abort.""" + logging.warning(u'Signalled abort.') + self._event_queue_producer.SignalEndOfInput() + self._parse_error_queue_producer.SignalEndOfInput() + + def SignalEndOfInputStorageQueue(self): + """Signals the storage queue no input remains.""" + self._event_queue_producer.SignalEndOfInput() + self._parse_error_queue_producer.SignalEndOfInput() + + def SourceIsDirectory(self): + """Determines if the source is a directory. + + Raises: + RuntimeError: if source path specification is not set. + """ + if not self._source_file_entry: + raise RuntimeError(u'Missing source.') + + return (not self.SourceIsStorageMediaImage() and + self._source_file_entry.IsDirectory()) + + def SourceIsFile(self): + """Determines if the source is a file. + + Raises: + RuntimeError: if source path specification is not set. + """ + if not self._source_file_entry: + raise RuntimeError(u'Missing source.') + + return (not self.SourceIsStorageMediaImage() and + self._source_file_entry.IsFile()) + + def SourceIsStorageMediaImage(self): + """Determines if the source is storage media image file or device. + + Raises: + RuntimeError: if source path specification is not set. + """ + if not self._source_path_spec: + raise RuntimeError(u'Missing source.') + + return self._source_path_spec.type_indicator not in [ + dfvfs_definitions.TYPE_INDICATOR_OS, + dfvfs_definitions.TYPE_INDICATOR_FAKE] diff --git a/plaso/engine/queue.py b/plaso/engine/queue.py new file mode 100644 index 0000000..bd6d7d8 --- /dev/null +++ b/plaso/engine/queue.py @@ -0,0 +1,204 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Queue management implementation for Plaso. + +This file contains an implementation of a queue used by plaso for +queue management. + +The queue has been abstracted in order to provide support for different +implementations of the queueing mechanism, to support multi processing and +scalability. +""" + +import abc + +from plaso.lib import errors + + +class QueueEndOfInput(object): + """Class that implements a queue end of input.""" + + +class Queue(object): + """Class that implements the queue interface.""" + + @abc.abstractmethod + def __len__(self): + """Returns the estimated current number of items in the queue.""" + + @abc.abstractmethod + def IsEmpty(self): + """Determines if the queue is empty.""" + + @abc.abstractmethod + def PushItem(self, item): + """Pushes an item onto the queue.""" + + @abc.abstractmethod + def PopItem(self): + """Pops an item off the queue.""" + + def SignalEndOfInput(self): + """Signals the queue no input remains.""" + self.PushItem(QueueEndOfInput()) + + +class QueueConsumer(object): + """Class that implements the queue consumer interface. + + The consumer subscribes to updates on the queue. + """ + + def __init__(self, queue_object): + """Initializes the queue consumer. + + Args: + queue_object: the queue object (instance of Queue). + """ + super(QueueConsumer, self).__init__() + self._abort = False + self._queue = queue_object + + def SignalAbort(self): + """Signals the consumer to abort.""" + self._abort = True + + +class QueueProducer(object): + """Class that implements the queue producer interface. + + The producer generates updates on the queue. + """ + + def __init__(self, queue_object): + """Initializes the queue producer. + + Args: + queue_object: the queue object (instance of Queue). + """ + super(QueueProducer, self).__init__() + self._abort = False + self._queue = queue_object + + def SignalAbort(self): + """Signals the producer to abort.""" + self._abort = True + + def SignalEndOfInput(self): + """Signals the queue no input remains.""" + self._queue.SignalEndOfInput() + + +class EventObjectQueueConsumer(QueueConsumer): + """Class that implements the event object queue consumer. + + The consumer subscribes to updates on the queue. + """ + + @abc.abstractmethod + def _ConsumeEventObject(self, event_object, **kwargs): + """Consumes an event object callback for ConsumeEventObjects.""" + + def ConsumeEventObjects(self, **kwargs): + """Consumes the event object that are pushed on the queue. + + This function will issue a callback to _ConsumeEventObject for every + event object (instance of EventObject) consumed from the queue. + + Args: + kwargs: keyword arguments to pass to the _ConsumeEventObject callback. + """ + while not self._abort: + try: + item = self._queue.PopItem() + except errors.QueueEmpty: + break + + if isinstance(item, QueueEndOfInput): + # Push the item back onto the queue to make sure all + # queue consumers are stopped. + self._queue.PushItem(item) + break + + self._ConsumeEventObject(item, **kwargs) + + self._abort = False + + +class ItemQueueConsumer(QueueConsumer): + """Class that implements an item queue consumer. + + The consumer subscribes to updates on the queue. + """ + + @abc.abstractmethod + def _ConsumeItem(self, item): + """Consumes an item callback for ConsumeItems. + + Args: + item: the item object. + """ + + def ConsumeItems(self): + """Consumes the items that are pushed on the queue.""" + while not self._abort: + try: + item = self._queue.PopItem() + except errors.QueueEmpty: + break + + if isinstance(item, QueueEndOfInput): + # Push the item back onto the queue to make sure all + # queue consumers are stopped. + self._queue.PushItem(item) + break + + self._ConsumeItem(item) + + self._abort = False + + +class ItemQueueProducer(QueueProducer): + """Class that implements an item queue producer. + + The producer generates updates on the queue. + """ + + def _FlushQueue(self): + """Flushes the queue callback for the QueueFull exception.""" + return + + def ProduceItem(self, item): + """Produces an item onto the queue. + + Args: + item: the item object. + """ + try: + self._queue.PushItem(item) + except errors.QueueFull: + self._FlushQueue() + + def ProduceItems(self, items): + """Produces items onto the queue. + + Args: + items: a list or generator of item objects. + """ + for item in items: + self.ProduceItem(item) diff --git a/plaso/engine/single_process.py b/plaso/engine/single_process.py new file mode 100644 index 0000000..bde1c38 --- /dev/null +++ b/plaso/engine/single_process.py @@ -0,0 +1,366 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The single process processing engine.""" + +import collections +import logging +import pdb + +from plaso.engine import collector +from plaso.engine import engine +from plaso.engine import queue +from plaso.engine import worker +from plaso.lib import errors +from plaso.parsers import context as parsers_context + + +class SingleProcessCollector(collector.Collector): + """Class that implements a single process collector object.""" + + def __init__( + self, process_queue, source_path, source_path_spec, + resolver_context=None): + """Initializes the collector object. + + The collector discovers all the files that need to be processed by + the workers. Once a file is discovered it is added to the process queue + as a path specification (instance of dfvfs.PathSpec). + + Args: + process_queue: The process queue (instance of Queue). This queue contains + the file entries that need to be processed. + source_path: Path of the source file or directory. + source_path_spec: The source path specification (instance of + dfvfs.PathSpec) as determined by the file system + scanner. The default is None. + resolver_context: Optional resolver context (instance of dfvfs.Context). + The default is None. + """ + super(SingleProcessCollector, self).__init__( + process_queue, source_path, source_path_spec, + resolver_context=resolver_context) + + self._extraction_worker = None + self._fs_collector = SingleProcessFileSystemCollector(process_queue) + + def _FlushQueue(self): + """Flushes the queue callback for the QueueFull exception.""" + while not self._queue.IsEmpty(): + logging.debug(u'Extraction worker started.') + self._extraction_worker.Run() + logging.debug(u'Extraction worker stopped.') + + def SetExtractionWorker(self, extraction_worker): + """Sets the extraction worker. + + Args: + extraction_worker: the extraction worker object (instance of + EventExtractionWorker). + """ + self._extraction_worker = extraction_worker + + self._fs_collector.SetExtractionWorker(extraction_worker) + + +class SingleProcessEngine(engine.BaseEngine): + """Class that defines the single process engine.""" + + def __init__(self, maximum_number_of_queued_items=0): + """Initialize the single process engine object. + + Args: + maximum_number_of_queued_items: The maximum number of queued items. + The default is 0, which represents + no limit. + """ + collection_queue = SingleProcessQueue( + maximum_number_of_queued_items=maximum_number_of_queued_items) + storage_queue = SingleProcessQueue( + maximum_number_of_queued_items=maximum_number_of_queued_items) + parse_error_queue = SingleProcessQueue( + maximum_number_of_queued_items=maximum_number_of_queued_items) + + super(SingleProcessEngine, self).__init__( + collection_queue, storage_queue, parse_error_queue) + + self._event_queue_producer = SingleProcessItemQueueProducer(storage_queue) + self._parse_error_queue_producer = SingleProcessItemQueueProducer( + parse_error_queue) + + def CreateCollector( + self, include_directory_stat, vss_stores=None, filter_find_specs=None, + resolver_context=None): + """Creates a collector object. + + The collector discovers all the files that need to be processed by + the workers. Once a file is discovered it is added to the process queue + as a path specification (instance of dfvfs.PathSpec). + + Args: + include_directory_stat: Boolean value to indicate whether directory + stat information should be collected. + vss_stores: Optional list of VSS stores to include in the collection, + where 1 represents the first store. Set to None if no + VSS stores should be processed. The default is None. + filter_find_specs: Optional list of filter find specifications (instances + of dfvfs.FindSpec). The default is None. + resolver_context: Optional resolver context (instance of dfvfs.Context). + The default is None. Note that every thread or process + must have its own resolver context. + + Returns: + A collector object (instance of Collector). + + Raises: + RuntimeError: if source path specification is not set. + """ + if not self._source_path_spec: + raise RuntimeError(u'Missing source.') + + collector_object = SingleProcessCollector( + self._collection_queue, self._source, self._source_path_spec, + resolver_context=resolver_context) + + collector_object.SetCollectDirectoryMetadata(include_directory_stat) + + if vss_stores: + collector_object.SetVssInformation(vss_stores) + + if filter_find_specs: + collector_object.SetFilter(filter_find_specs) + + return collector_object + + def CreateExtractionWorker(self, worker_number): + """Creates an extraction worker object. + + Args: + worker_number: A number that identifies the worker. + + Returns: + An extraction worker (instance of worker.ExtractionWorker). + """ + parser_context = parsers_context.ParserContext( + self._event_queue_producer, self._parse_error_queue_producer, + self.knowledge_base) + + extraction_worker = SingleProcessEventExtractionWorker( + worker_number, self._collection_queue, self._event_queue_producer, + self._parse_error_queue_producer, parser_context) + + extraction_worker.SetEnableDebugOutput(self._enable_debug_output) + + # TODO: move profiler in separate object. + extraction_worker.SetEnableProfiling( + self._enable_profiling, + profiling_sample_rate=self._profiling_sample_rate) + + if self._open_files: + extraction_worker.SetOpenFiles(self._open_files) + + if self._filter_object: + extraction_worker.SetFilterObject(self._filter_object) + + if self._mount_path: + extraction_worker.SetMountPath(self._mount_path) + + if self._text_prepend: + extraction_worker.SetTextPrepend(self._text_prepend) + + return extraction_worker + + def ProcessSource( + self, collector_object, storage_writer, parser_filter_string=None): + """Processes the source and extracts event objects. + + Args: + collector_object: A collector object (instance of Collector). + storage_writer: A storage writer object (instance of BaseStorageWriter). + parser_filter_string: Optional parser filter string. The default is None. + """ + extraction_worker = self.CreateExtractionWorker(0) + + extraction_worker.InitalizeParserObjects( + parser_filter_string=parser_filter_string) + + # Set the extraction worker and storage writer values so that they + # can be accessed if the QueueFull exception is raised. This is + # needed in single process mode to prevent the queue consuming too + # much memory. + collector_object.SetExtractionWorker(extraction_worker) + self._event_queue_producer.SetStorageWriter(storage_writer) + self._parse_error_queue_producer.SetStorageWriter(storage_writer) + + logging.debug(u'Processing started.') + + logging.debug(u'Collection started.') + collector_object.Collect() + logging.debug(u'Collection stopped.') + + logging.debug(u'Extraction worker started.') + extraction_worker.Run() + logging.debug(u'Extraction worker stopped.') + + self._event_queue_producer.SignalEndOfInput() + + logging.debug(u'Storage writer started.') + storage_writer.WriteEventObjects() + logging.debug(u'Storage writer stopped.') + + # Reset the extraction worker and storage writer values to return + # the objects in their original state. This will prevent access + # to the extraction worker outside this function and allow it + # to be garbage collected. + self._event_queue_producer.SetStorageWriter(None) + self._parse_error_queue_producer.SetStorageWriter(None) + collector_object.SetExtractionWorker(None) + + logging.debug(u'Processing completed.') + + +class SingleProcessEventExtractionWorker(worker.BaseEventExtractionWorker): + """Class that defines the single process event extraction worker.""" + + def _DebugParseFileEntry(self): + """Callback for debugging file entry parsing failures.""" + pdb.post_mortem() + + +class SingleProcessFileSystemCollector(collector.FileSystemCollector): + """Class that implements a single process file system collector object.""" + + def __init__(self, process_queue): + """Initializes the collector object. + + The collector discovers all the files that need to be processed by + the workers. Once a file is discovered it is added to the process queue + as a path specification (instance of dfvfs.PathSpec). + + Args: + process_queue: The process queue (instance of Queue). This queue contains + the file entries that need to be processed. + """ + super(SingleProcessFileSystemCollector, self).__init__(process_queue) + + self._extraction_worker = None + + def _FlushQueue(self): + """Flushes the queue callback for the QueueFull exception.""" + while not self._queue.IsEmpty(): + logging.debug(u'Extraction worker started.') + self._extraction_worker.Run() + logging.debug(u'Extraction worker stopped.') + + def SetExtractionWorker(self, extraction_worker): + """Sets the extraction worker. + + Args: + extraction_worker: the extraction worker object (instance of + EventExtractionWorker). + """ + self._extraction_worker = extraction_worker + + +class SingleProcessItemQueueProducer(queue.ItemQueueProducer): + """Class that implements a single process item queue producer.""" + + def __init__(self, queue_object): + """Initializes the queue producer. + + Args: + queue_object: the queue object (instance of Queue). + """ + super(SingleProcessItemQueueProducer, self).__init__(queue_object) + + self._storage_writer = None + + def _FlushQueue(self): + """Flushes the queue callback for the QueueFull exception.""" + logging.debug(u'Storage writer started.') + self._storage_writer.WriteEventObjects() + logging.debug(u'Storage writer stopped.') + + def SetStorageWriter(self, storage_writer): + """Sets the storage writer. + + Args: + storage_writer: the storage writer object (instance of + BaseStorageWriter). + """ + self._storage_writer = storage_writer + + +class SingleProcessQueue(queue.Queue): + """Single process queue.""" + + def __init__(self, maximum_number_of_queued_items=0): + """Initializes a single process queue object. + + Args: + maximum_number_of_queued_items: The maximum number of queued items. + The default is 0, which represents + no limit. + """ + super(SingleProcessQueue, self).__init__() + + # The Queue interface defines the maximum number of queued items to be + # 0 if unlimited as does the multi processing queue, but deque uses + # None to indicate no limit. + if maximum_number_of_queued_items == 0: + maximum_number_of_queued_items = None + + # maxlen contains the maximum number of items allowed to be queued, + # where None represents unlimited. + self._queue = collections.deque( + maxlen=maximum_number_of_queued_items) + + def __len__(self): + """Returns the estimated current number of items in the queue.""" + return len(self._queue) + + def IsEmpty(self): + """Determines if the queue is empty.""" + return len(self._queue) == 0 + + def PushItem(self, item): + """Pushes an item onto the queue. + + Raises: + QueueFull: when the queue is full. + """ + number_of_items = len(self._queue) + + # Deque will drop the first item in the queue when maxlen is exceeded. + if not self._queue.maxlen or number_of_items < self._queue.maxlen: + self._queue.append(item) + number_of_items += 1 + + if self._queue.maxlen and number_of_items == self._queue.maxlen: + raise errors.QueueFull + + def PopItem(self): + """Pops an item off the queue. + + Raises: + QueueEmpty: when the queue is empty. + """ + try: + # Using popleft to have FIFO behavior. + return self._queue.popleft() + except IndexError: + raise errors.QueueEmpty diff --git a/plaso/engine/single_process_test.py b/plaso/engine/single_process_test.py new file mode 100644 index 0000000..da3f57c --- /dev/null +++ b/plaso/engine/single_process_test.py @@ -0,0 +1,133 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests the single process processing engine.""" + +import os +import unittest + +from dfvfs.lib import definitions as dfvfs_definitions +from dfvfs.helpers import file_system_searcher +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import context + +from plaso.engine import single_process +from plaso.engine import test_lib +from plaso.lib import errors + + +class SingleProcessQueueTest(unittest.TestCase): + """Tests the single process queue.""" + + _ITEMS = frozenset(['item1', 'item2', 'item3', 'item4']) + + def testPushPopItem(self): + """Tests the PushItem and PopItem functions.""" + test_queue = single_process.SingleProcessQueue() + + for item in self._ITEMS: + test_queue.PushItem(item) + + self.assertEquals(len(test_queue), len(self._ITEMS)) + + test_queue.SignalEndOfInput() + test_queue_consumer = test_lib.TestQueueConsumer(test_queue) + test_queue_consumer.ConsumeItems() + + expected_number_of_items = len(self._ITEMS) + self.assertEquals( + test_queue_consumer.number_of_items, expected_number_of_items) + + def testQueueEmpty(self): + """Tests the queue raises the QueueEmpty exception.""" + test_queue = single_process.SingleProcessQueue() + + with self.assertRaises(errors.QueueEmpty): + test_queue.PopItem() + + def testQueueFull(self): + """Tests the queue raises the QueueFull exception.""" + test_queue = single_process.SingleProcessQueue( + maximum_number_of_queued_items=5) + + for item in self._ITEMS: + test_queue.PushItem(item) + + with self.assertRaises(errors.QueueFull): + test_queue.PushItem('item5') + + with self.assertRaises(errors.QueueFull): + test_queue.PushItem('item6') + + test_queue_consumer = test_lib.TestQueueConsumer(test_queue) + test_queue_consumer.ConsumeItems() + + expected_number_of_items = len(self._ITEMS) + self.assertEquals( + test_queue_consumer.number_of_items, expected_number_of_items + 1) + + +class SingleProcessEngineTest(unittest.TestCase): + """Tests for the engine object.""" + + _TEST_DATA_PATH = os.path.join(os.getcwd(), u'test_data') + + def testEngine(self): + """Test the engine functionality.""" + resolver_context = context.Context() + test_engine = single_process.SingleProcessEngine( + maximum_number_of_queued_items=25000) + + self.assertNotEquals(test_engine, None) + + source_path = os.path.join(self._TEST_DATA_PATH, u'ímynd.dd') + os_path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_OS, location=source_path) + source_path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', + parent=os_path_spec) + + test_engine.SetSource(source_path_spec, resolver_context=resolver_context) + + self.assertFalse(test_engine.SourceIsDirectory()) + self.assertFalse(test_engine.SourceIsFile()) + self.assertTrue(test_engine.SourceIsStorageMediaImage()) + + test_searcher = test_engine.GetSourceFileSystemSearcher( + resolver_context=resolver_context) + self.assertNotEquals(test_searcher, None) + self.assertIsInstance( + test_searcher, file_system_searcher.FileSystemSearcher) + + test_engine.PreprocessSource('Windows') + + test_collector = test_engine.CreateCollector( + False, vss_stores=None, filter_find_specs=None, + resolver_context=resolver_context) + self.assertNotEquals(test_collector, None) + self.assertIsInstance( + test_collector, single_process.SingleProcessCollector) + + test_extraction_worker = test_engine.CreateExtractionWorker(0) + self.assertNotEquals(test_extraction_worker, None) + self.assertIsInstance( + test_extraction_worker, + single_process.SingleProcessEventExtractionWorker) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/engine/test_lib.py b/plaso/engine/test_lib.py new file mode 100644 index 0000000..26035ea --- /dev/null +++ b/plaso/engine/test_lib.py @@ -0,0 +1,71 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Engine related functions and classes for testing.""" + +import os +import unittest + +from plaso.engine import queue + + +class TestQueueConsumer(queue.ItemQueueConsumer): + """Class that implements the test queue consumer. + + The queue consumer subscribes to updates on the queue. + """ + + def __init__(self, test_queue): + """Initializes the queue consumer. + + Args: + test_queue: the test queue (instance of Queue). + """ + super(TestQueueConsumer, self).__init__(test_queue) + self.items = [] + + def _ConsumeItem(self, item): + """Consumes an item callback for ConsumeItems.""" + self.items.append(item) + + @property + def number_of_items(self): + """The number of items.""" + return len(self.items) + + +class EngineTestCase(unittest.TestCase): + """The unit test case for a front-end.""" + + _TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data') + + # Show full diff results, part of TestCase so does not follow our naming + # conventions. + maxDiff = None + + def _GetTestFilePath(self, path_segments): + """Retrieves the path of a test file relative to the test data directory. + + Args: + path_segments: the path segments inside the test data directory. + + Returns: + A path of the test file. + """ + # Note that we need to pass the individual path segments to os.path.join + # and not a list. + return os.path.join(self._TEST_DATA_PATH, *path_segments) diff --git a/plaso/engine/utils.py b/plaso/engine/utils.py new file mode 100644 index 0000000..1d60a55 --- /dev/null +++ b/plaso/engine/utils.py @@ -0,0 +1,75 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Engine utility functions.""" + +import logging + +from dfvfs.helpers import file_system_searcher + +from plaso.winreg import path_expander + + +def BuildFindSpecsFromFile(filter_file_path, pre_obj=None): + """Returns a list of find specification from a filter file. + + Args: + filter_file_path: A path to a file that contains find specifications. + pre_obj: A preprocessing object (instance of PreprocessObject). This is + optional but when provided takes care of expanding each segment. + """ + find_specs = [] + + if pre_obj: + expander = path_expander.WinRegistryKeyPathExpander() + + with open(filter_file_path, 'rb') as file_object: + for line in file_object: + line = line.strip() + if line.startswith(u'#'): + continue + + if pre_obj: + try: + line = expander.ExpandPath(line, pre_obj=pre_obj) + except KeyError as exception: + logging.error(( + u'Unable to use collection filter line: {0:s} with error: ' + u'{1:s}').format(line, exception)) + continue + + if not line.startswith(u'/'): + logging.warning(( + u'The filter string must be defined as an abolute path: ' + u'{0:s}').format(line)) + continue + + _, _, file_path = line.rstrip().rpartition(u'/') + if not file_path: + logging.warning( + u'Unable to parse the filter string: {0:s}'.format(line)) + continue + + # Convert the filter paths into a list of path segments and strip + # the root path segment. + path_segments = line.split(u'/') + path_segments.pop(0) + + find_specs.append(file_system_searcher.FindSpec( + location_regex=path_segments, case_sensitive=False)) + + return find_specs diff --git a/plaso/engine/worker.py b/plaso/engine/worker.py new file mode 100644 index 0000000..8eae3a1 --- /dev/null +++ b/plaso/engine/worker.py @@ -0,0 +1,352 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The event extraction worker.""" + +import logging +import os + +from dfvfs.resolver import context +from dfvfs.resolver import resolver as path_spec_resolver + +try: + from guppy import hpy +except ImportError: + hpy = None + +from plaso.engine import classifier +from plaso.engine import queue +from plaso.lib import errors +from plaso.parsers import manager as parsers_manager + + +class BaseEventExtractionWorker(queue.ItemQueueConsumer): + """Class that defines the event extraction worker base. + + This class is designed to watch a queue for path specifications of files + and directories (file entries) for which events need to be extracted. + + The event extraction worker needs to determine if a parser suitable + for parsing a particular file is available. All extracted event objects + are pushed on a storage queue for further processing. + """ + + def __init__( + self, identifier, process_queue, event_queue_producer, + parse_error_queue_producer, parser_context): + """Initializes the event extraction worker object. + + Args: + identifier: The identifier, usually an incrementing integer. + process_queue: The process queue (instance of Queue). This queue contains + the file entries that need to be processed. + event_queue_producer: The event object queue producer (instance of + ItemQueueProducer). + parse_error_queue_producer: The parse error queue producer (instance of + ItemQueueProducer). + parser_context: A parser context object (instance of ParserContext). + """ + super(BaseEventExtractionWorker, self).__init__(process_queue) + self._enable_debug_output = False + self._identifier = identifier + self._open_files = False + self._parser_context = parser_context + self._filestat_parser_object = None + self._parser_objects = None + + # We need a resolver context per process to prevent multi processing + # issues with file objects stored in images. + self._resolver_context = context.Context() + self._event_queue_producer = event_queue_producer + self._parse_error_queue_producer = parse_error_queue_producer + + # Attributes that contain the current status of the worker. + self._current_working_file = u'' + self._is_running = False + + # Attributes for profiling. + self._enable_profiling = False + self._heapy = None + self._profiling_sample = 0 + self._profiling_sample_rate = 1000 + self._profiling_sample_file = u'{0!s}.hpy'.format(self._identifier) + + def _ConsumeItem(self, path_spec): + """Consumes an item callback for ConsumeItems. + + Args: + path_spec: a path specification (instance of dfvfs.PathSpec). + """ + file_entry = path_spec_resolver.Resolver.OpenFileEntry( + path_spec, resolver_context=self._resolver_context) + + if file_entry is None: + logging.warning(u'Unable to open file entry: {0:s}'.format( + path_spec.comparable)) + return + + try: + self.ParseFileEntry(file_entry) + except IOError as exception: + logging.warning(u'Unable to parse file: {0:s} with error: {1:s}'.format( + path_spec.comparable, exception)) + + def _DebugParseFileEntry(self): + """Callback for debugging file entry parsing failures.""" + return + + def _ParseFileEntryWithParser(self, parser_object, file_entry): + """Parses a file entry with a specific parser. + + Args: + parser_object: A parser object (instance of BaseParser). + file_entry: A file entry object (instance of dfvfs.FileEntry). + + Raises: + QueueFull: If a queue is full. + """ + try: + parser_object.Parse(self._parser_context, file_entry) + + except errors.UnableToParseFile as exception: + logging.debug(u'Not a {0:s} file ({1:s}) - {2:s}'.format( + parser_object.NAME, file_entry.name, exception)) + + except errors.QueueFull: + raise + + except IOError as exception: + logging.debug( + u'[{0:s}] Unable to parse: {1:s} with error: {2:s}'.format( + parser_object.NAME, file_entry.path_spec.comparable, + exception)) + + # Casting a wide net, catching all exceptions. Done to keep the worker + # running, despite the parser hitting errors, so the worker doesn't die + # if a single file is corrupted or there is a bug in a parser. + except Exception as exception: + logging.warning( + u'[{0:s}] Unable to process file: {1:s} with error: {2:s}.'.format( + parser_object.NAME, file_entry.path_spec.comparable, + exception)) + logging.debug( + u'The path specification that caused the error: {0:s}'.format( + file_entry.path_spec.comparable)) + logging.exception(exception) + + if self._enable_debug_output: + self._DebugParseFileEntry() + + def _ProfilingStart(self): + """Starts the profiling.""" + self._heapy.setrelheap() + self._profiling_sample = 0 + + try: + os.remove(self._profiling_sample_file) + except OSError: + pass + + def _ProfilingStop(self): + """Stops the profiling.""" + self._ProfilingWriteSample() + + def _ProfilingUpdate(self): + """Updates the profiling.""" + self._profiling_sample += 1 + + if self._profiling_sample >= self._profiling_sample_rate: + self._ProfilingWriteSample() + self._profiling_sample = 0 + + def _ProfilingWriteSample(self): + """Writes a profiling sample to the sample file.""" + heap = self._heapy.heap() + heap.dump(self._profiling_sample_file) + + def GetStatus(self): + """Returns a status dictionary.""" + return { + 'is_running': self._is_running, + 'identifier': u'Worker_{0:d}'.format(self._identifier), + 'current_file': self._current_working_file, + 'counter': self._parser_context.number_of_events} + + def InitalizeParserObjects(self, parser_filter_string=None): + """Initializes the parser objects. + + The parser_filter_string is a simple comma separated value string that + denotes a list of parser names to include and/or exclude. Each entry + can have the value of: + + Exact match of a list of parsers, or a preset (see + plaso/frontend/presets.py for a full list of available presets). + + A name of a single parser (case insensitive), eg. msiecfparser. + + A glob name for a single parser, eg: '*msie*' (case insensitive). + + Args: + parser_filter_string: Optional parser filter string. The default is None. + """ + self._parser_objects = parsers_manager.ParsersManager.GetParserObjects( + parser_filter_string=parser_filter_string) + + for parser_object in self._parser_objects: + if parser_object.NAME == 'filestat': + self._filestat_parser_object = parser_object + break + + def ParseFileEntry(self, file_entry): + """Parses a file entry. + + Args: + file_entry: A file entry object (instance of dfvfs.FileEntry). + """ + logging.debug(u'[ParseFileEntry] Parsing: {0:s}'.format( + file_entry.path_spec.comparable)) + + self._current_working_file = getattr( + file_entry.path_spec, u'location', file_entry.name) + + if file_entry.IsDirectory() and self._filestat_parser_object: + self._ParseFileEntryWithParser(self._filestat_parser_object, file_entry) + + elif file_entry.IsFile(): + # TODO: Not go through all parsers, just the ones + # that the classifier classifies the file as. + + for parser_object in self._parser_objects: + logging.debug(u'Trying to parse: {0:s} with parser: {1:s}'.format( + file_entry.name, parser_object.NAME)) + + self._ParseFileEntryWithParser(parser_object, file_entry) + + logging.debug(u'[ParseFileEntry] Done parsing: {0:s}'.format( + file_entry.path_spec.comparable)) + + if self._enable_profiling: + self._ProfilingUpdate() + + if self._open_files: + try: + for sub_file_entry in classifier.Classifier.SmartOpenFiles(file_entry): + if self._abort: + break + + self.ParseFileEntry(sub_file_entry) + + except IOError as exception: + logging.warning( + u'Unable to parse file: {0:s} with error: {1:s}'.format( + file_entry.path_spec.comparable, exception)) + + def Run(self): + """Extracts event objects from file entries.""" + self._parser_context.ResetCounters() + + if self._enable_profiling: + self._ProfilingStart() + + self._is_running = True + + logging.info( + u'Worker {0:d} (PID: {1:d}) started monitoring process queue.'.format( + self._identifier, os.getpid())) + + self.ConsumeItems() + + logging.info( + u'Worker {0:d} (PID: {1:d}) stopped monitoring process queue.'.format( + self._identifier, os.getpid())) + + self._current_working_file = u'' + + self._is_running = False + + if self._enable_profiling: + self._ProfilingStop() + + self._resolver_context.Empty() + + def SetEnableDebugOutput(self, enable_debug_output): + """Enables or disables debug output. + + Args: + enable_debug_output: boolean value to indicate if the debug output + should be enabled. + """ + self._enable_debug_output = enable_debug_output + + def SetEnableProfiling(self, enable_profiling, profiling_sample_rate=1000): + """Enables or disables profiling. + + Args: + enable_debug_output: boolean value to indicate if the profiling + should be enabled. + profiling_sample_rate: optional integer indicating the profiling sample + rate. The value contains the number of files + processed. The default value is 1000. + """ + if hpy: + self._enable_profiling = enable_profiling + self._profiling_sample_rate = profiling_sample_rate + + if self._enable_profiling and not self._heapy: + self._heapy = hpy() + + def SetFilterObject(self, filter_object): + """Sets the filter object. + + Args: + filter_object: the filter object (instance of objectfilter.Filter). + """ + self._parser_context.SetFilterObject(filter_object) + + def SetMountPath(self, mount_path): + """Sets the mount path. + + Args: + mount_path: string containing the mount path. + """ + self._parser_context.SetMountPath(mount_path) + + # TODO: rename this mode. + def SetOpenFiles(self, open_files): + """Sets the open files mode. + + Args: + open_files: boolean value to indicate if the worker should scan for + file entries inside files. + """ + self._open_files = open_files + + def SetTextPrepend(self, text_prepend): + """Sets the text prepend. + + Args: + text_prepend: string that contains the text to prepend to every + event object. + """ + self._parser_context.SetTextPrepend(text_prepend) + + def SignalAbort(self): + """Signals the worker to abort.""" + super(BaseEventExtractionWorker, self).SignalAbort() + self._parser_context.SignalAbort() + + @classmethod + def SupportsProfiling(cls): + """Returns a boolean value to indicate if profiling is supported.""" + return hpy is not None diff --git a/plaso/events/__init__.py b/plaso/events/__init__.py new file mode 100644 index 0000000..f4a69a4 --- /dev/null +++ b/plaso/events/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/events/plist_event.py b/plaso/events/plist_event.py new file mode 100644 index 0000000..3641f3a --- /dev/null +++ b/plaso/events/plist_event.py @@ -0,0 +1,92 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file is the template for Plist events.""" + +from plaso.events import time_events +from plaso.lib import eventdata + + +class PlistEvent(time_events.PythonDatetimeEvent): + """Convenience class for a plist events.""" + + DATA_TYPE = 'plist:key' + + def __init__(self, root, key, timestamp, desc=None, host=None, user=None): + """Template for creating a Plist EventObject for returning data to Plaso. + + All events extracted from files get passed around Plaso internally as an + EventObject. PlistEvent is an EventObject with attributes specifically + relevant to data extracted from a Plist file. The attribute DATA_TYPE + 'plist:key' allows the formatter used during output to identify + the appropriate formatter for converting these attributes to output. + + Args: + root: A string representing the path from the root to this key. + key: A string representing the name of key. + timestamp: The date object (instance of datetime.datetime). + desc: An optional string intended for the user describing the event. + host: An optional host name if one is available within the log file. + user: An optional user name if one is available within the log file. + """ + super(PlistEvent, self).__init__( + timestamp, eventdata.EventTimestamp.WRITTEN_TIME) + + self.root = root + self.key = key + if desc: + self.desc = desc + if host: + self.hostname = host + if user: + self.username = user + + +class PlistTimeEvent(time_events.TimestampEvent): + """Convenience class for a plist event that does not use datetime objects.""" + + DATA_TYPE = 'plist:key' + + def __init__(self, root, key, timestamp, desc=None, host=None, user=None): + """Template for creating a Plist EventObject for returning data to Plaso. + + All events extracted from files get passed around Plaso internally as an + EventObject. PlistEvent is an EventObject with attributes specifically + relevant to data extracted from a Plist file. The attribute DATA_TYPE + 'plist:key' allows the formatter used during output to identify + the appropriate formatter for converting these attributes to output. + + Args: + root: A string representing the path from the root to this key. + key: A string representing the name of key. + timestamp: The timestamp time value. The timestamp contains the + number of microseconds since Jan 1, 1970 00:00:00 UTC. + desc: An optional string intended for the user describing the event. + host: An optional host name if one is available within the log file. + user: An optional user name if one is available within the log file. + """ + super(PlistTimeEvent, self).__init__( + timestamp, eventdata.EventTimestamp.WRITTEN_TIME) + + self.root = root + self.key = key + if desc: + self.desc = desc + if host: + self.hostname = host + if user: + self.username = user diff --git a/plaso/events/shell_item_events.py b/plaso/events/shell_item_events.py new file mode 100644 index 0000000..82de6b3 --- /dev/null +++ b/plaso/events/shell_item_events.py @@ -0,0 +1,50 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the shell item specific event object classes.""" + +from plaso.events import time_events + + +class ShellItemFileEntryEvent(time_events.FatDateTimeEvent): + """Convenience class for a shell item file entry event.""" + + DATA_TYPE = 'windows:shell_item:file_entry' + + def __init__( + self, fat_date_time, usage, name, long_name, localized_name, + file_reference, origin): + """Initializes an event object. + + Args: + fat_date_time: The FAT date time value. + usage: The description of the usage of the time value. + name: A string containing the name of the file entry shell item. + long_name: A string containing the long name of the file entry shell item. + localized_name: A string containing the localized name of the file entry + shell item. + file_reference: A string containing the NTFS file reference + (MTF entry - sequence number). + origin: A string containing the origin of the event (event source). + """ + super(ShellItemFileEntryEvent, self).__init__(fat_date_time, usage) + + self.name = name + self.long_name = long_name + self.localized_name = localized_name + self.file_reference = file_reference + self.origin = origin diff --git a/plaso/events/text_events.py b/plaso/events/text_events.py new file mode 100644 index 0000000..f40b7eb --- /dev/null +++ b/plaso/events/text_events.py @@ -0,0 +1,48 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the text format specific event object classes.""" + +from plaso.events import time_events +from plaso.lib import eventdata + + +class TextEvent(time_events.TimestampEvent): + """Convenience class for a text format-based event.""" + + DATA_TYPE = 'text:entry' + + def __init__(self, timestamp, offset, attributes): + """Initializes a text event object. + + Args: + timestamp: The timestamp time value. The timestamp contains the + number of microseconds since Jan 1, 1970 00:00:00 UTC. + offset: The offset of the attributes. + attributes: A dict that contains the events attributes. + """ + super(TextEvent, self).__init__( + timestamp, eventdata.EventTimestamp.WRITTEN_TIME) + + self.offset = offset + + for name, value in attributes.iteritems(): + # TODO: Revisit this constraints and see if we can implement + # it using a more sane solution. + if isinstance(value, basestring) and not value: + continue + setattr(self, name, value) diff --git a/plaso/events/time_events.py b/plaso/events/time_events.py new file mode 100644 index 0000000..dddb39c --- /dev/null +++ b/plaso/events/time_events.py @@ -0,0 +1,157 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the time-based event object classes.""" + +from plaso.lib import event +from plaso.lib import timelib + + +class TimestampEvent(event.EventObject): + """Convenience class for a timestamp-based event.""" + + def __init__(self, timestamp, usage, data_type=None): + """Initializes an event object. + + Args: + timestamp: The timestamp value. + usage: The description of the usage of the time value. + data_type: Optional event data type. If not set data_type is + derived from the DATA_TYPE attribute. + """ + super(TimestampEvent, self).__init__() + self.timestamp = timestamp + self.timestamp_desc = usage + + if data_type: + self.data_type = data_type + + +class CocoaTimeEvent(TimestampEvent): + """Convenience class for a Cocoa time-based event.""" + + def __init__(self, cocoa_time, usage, data_type=None): + """Initializes an event object. + + Args: + cocoa_time: The Cocoa time value. + usage: The description of the usage of the time value. + data_type: Optional event data type. If not set data_type is + derived from the DATA_TYPE attribute. + """ + super(CocoaTimeEvent, self).__init__( + timelib.Timestamp.FromCocoaTime(cocoa_time), usage, + data_type=data_type) + + +class FatDateTimeEvent(TimestampEvent): + """Convenience class for a FAT date time-based event.""" + + def __init__(self, fat_date_time, usage, data_type=None): + """Initializes an event object. + + Args: + fat_date_time: The FAT date time value. + usage: The description of the usage of the time value. + data_type: Optional event data type. If not set data_type is + derived from the DATA_TYPE attribute. + """ + super(FatDateTimeEvent, self).__init__( + timelib.Timestamp.FromFatDateTime(fat_date_time), usage, + data_type=data_type) + + +class FiletimeEvent(TimestampEvent): + """Convenience class for a FILETIME timestamp-based event.""" + + def __init__(self, filetime, usage, data_type=None): + """Initializes an event object. + + Args: + filetime: The FILETIME timestamp value. + usage: The description of the usage of the time value. + data_type: Optional event data type. If not set data_type is + derived from the DATA_TYPE attribute. + """ + super(FiletimeEvent, self).__init__( + timelib.Timestamp.FromFiletime(filetime), usage, data_type=data_type) + + +class JavaTimeEvent(TimestampEvent): + """Convenience class for a Java time-based event.""" + + def __init__(self, java_time, usage, data_type=None): + """Initializes an event object. + + Args: + java_time: The Java time value. + usage: The description of the usage of the time value. + data_type: Optional event data type. If not set data_type is + derived from the DATA_TYPE attribute. + """ + super(JavaTimeEvent, self).__init__( + timelib.Timestamp.FromJavaTime(java_time), usage, data_type=data_type) + + +class PosixTimeEvent(TimestampEvent): + """Convenience class for a POSIX time-based event.""" + + def __init__(self, posix_time, usage, data_type=None): + """Initializes an event object. + + Args: + posix_time: The POSIX time value. + usage: The description of the usage of the time value. + data_type: Optional event data type. If not set data_type is + derived from the DATA_TYPE attribute. + """ + super(PosixTimeEvent, self).__init__( + timelib.Timestamp.FromPosixTime(posix_time), usage, data_type=data_type) + + +class PythonDatetimeEvent(TimestampEvent): + """Convenience class for a Python DateTime time-based event.""" + + def __init__(self, datetime_time, usage, data_type=None): + """Initializes an event object. + + Args: + datetime_time: The datetime object (instance of datetime.datetime). + usage: The description of the usage of the time value. + data_type: Optional event data type. If not set data_type is + derived from the DATA_TYPE attribute. + """ + super(PythonDatetimeEvent, self).__init__( + timelib.Timestamp.FromPythonDatetime(datetime_time), usage, + data_type=data_type) + + +class WebKitTimeEvent(TimestampEvent): + """Convenience class for a WebKit time-based event.""" + + def __init__(self, webkit_time, usage, data_type=None): + """Initializes an event object. + + Args: + webkit_time: The WebKit time value. + usage: The description of the usage of the time value. + data_type: Optional event data type. If not set data_type is + derived from the DATA_TYPE attribute. + """ + super(WebKitTimeEvent, self).__init__( + timelib.Timestamp.FromWebKitTime(webkit_time), usage, + data_type=data_type) diff --git a/plaso/events/windows_events.py b/plaso/events/windows_events.py new file mode 100644 index 0000000..77cc397 --- /dev/null +++ b/plaso/events/windows_events.py @@ -0,0 +1,95 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Windows specific event object classes.""" + +from plaso.events import time_events +from plaso.lib import eventdata + + +class WindowsVolumeCreationEvent(time_events.FiletimeEvent): + """Convenience class for a Windows volume creation event.""" + + DATA_TYPE = 'windows:volume:creation' + + def __init__(self, filetime, device_path, serial_number, origin): + """Initializes an event object. + + Args: + filetime: The FILETIME timestamp value. + device_path: A string containing the volume device path. + serial_number: A string containing the volume serial number. + origin: A string containing the origin of the event (event source). + """ + super(WindowsVolumeCreationEvent, self).__init__( + filetime, eventdata.EventTimestamp.CREATION_TIME) + + self.device_path = device_path + self.serial_number = serial_number + self.origin = origin + + +class WindowsRegistryEvent(time_events.TimestampEvent): + """Convenience class for a Windows Registry-based event.""" + + DATA_TYPE = 'windows:registry:key_value' + + def __init__( + self, timestamp, key_name, value_dict, usage=None, offset=None, + registry_type=None, urls=None, source_append=None): + """Initializes a Windows registry event. + + Args: + timestamp: The timestamp time value. The timestamp contains the + number of microseconds since Jan 1, 1970 00:00:00 UTC. + key_name: The name of the Registry key being parsed. + value_dict: The interpreted value of the key, stored as a dictionary. + usage: Optional description of the usage of the time value. + The default is None. + offset: Optional (data) offset of the Registry key or value. + The default is None. + registry_type: Optional Registry type string. The default is None. + urls: Optional list of URLs. The default is None. + source_append: Optional string to append to the source_long of the event. + The default is None. + """ + if usage is None: + usage = eventdata.EventTimestamp.WRITTEN_TIME + + super(WindowsRegistryEvent, self).__init__(timestamp, usage) + + if key_name: + self.keyname = key_name + + self.regvalue = value_dict + + if offset or type(offset) in [int, long]: + self.offset = offset + + if registry_type: + self.registry_type = registry_type + + if urls: + self.url = u' - '.join(urls) + + if source_append: + self.source_append = source_append + + +class WindowsRegistryServiceEvent(WindowsRegistryEvent): + """Convenience class for service entries retrieved from the registry.""" + DATA_TYPE = 'windows:registry:service' diff --git a/plaso/filters/__init__.py b/plaso/filters/__init__.py new file mode 100644 index 0000000..e2e5b40 --- /dev/null +++ b/plaso/filters/__init__.py @@ -0,0 +1,56 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an import statement for each filter.""" +import logging + +from plaso.filters import dynamic_filter +from plaso.filters import eventfilter +from plaso.filters import filterlist + +from plaso.lib import filter_interface +from plaso.lib import errors + + +def ListFilters(): + """Generate a list of all available filters.""" + filters = [] + for cl in filter_interface.FilterObject.classes: + filters.append(filter_interface.FilterObject.classes[cl]()) + + return filters + + +def GetFilter(filter_string): + """Returns the first filter that matches the filter string. + + Args: + filter_string: A filter string for any of the available filters. + + Returns: + The first FilterObject found matching the filter string. If no FilterObject + is available for this filter string None is returned. + """ + if not filter_string: + return + + for filter_obj in ListFilters(): + try: + filter_obj.CompileFilter(filter_string) + return filter_obj + except errors.WrongPlugin: + logging.debug(u'Filterstring [{}] is not a filter: {}'.format( + filter_string, filter_obj.filter_name)) diff --git a/plaso/filters/dynamic_filter.py b/plaso/filters/dynamic_filter.py new file mode 100644 index 0000000..972aa2e --- /dev/null +++ b/plaso/filters/dynamic_filter.py @@ -0,0 +1,162 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains definition for a selective fields EventObjectFilter.""" +from plaso.lib import errors +from plaso.lib import lexer +from plaso.filters import eventfilter + + +class SelectiveLexer(lexer.Lexer): + """A simple selective filter lexer implementation.""" + + tokens = [ + lexer.Token('INITIAL', r'SELECT', '', 'FIELDS'), + lexer.Token('FIELDS', r'(.+) WHERE ', 'SetFields', 'FILTER'), + lexer.Token('FIELDS', r'(.+) LIMIT', 'SetFields', 'LIMIT_END'), + lexer.Token('FIELDS', r'(.+) SEPARATED BY', 'SetFields', 'SEPARATE'), + lexer.Token('FIELDS', r'(.+)$', 'SetFields', 'END'), + lexer.Token('FILTER', r'(.+) SEPARATED BY', 'SetFilter', 'SEPARATE'), + lexer.Token('FILTER', r'(.+) LIMIT', 'SetFilter', 'LIMIT_END'), + lexer.Token('FILTER', r'(.+)$', 'SetFilter', 'END'), + lexer.Token('SEPARATE', r' ', '', ''), # Ignore white space here. + lexer.Token('SEPARATE', r'LIMIT', '', 'LIMIT_END'), + lexer.Token( + 'SEPARATE', r'[\'"]([^ \'"]+)[\'"] LIMIT', 'SetSeparator', + 'LIMIT_END'), + lexer.Token( + 'SEPARATE', r'[\'"]([^ \'"]+)[\'"]$', 'SetSeparator', 'END'), + lexer.Token( + 'SEPARATE', r'(.+)$', 'SetSeparator', 'END'), + lexer.Token( + 'LIMIT_END', r'SEPARATED BY [\'"]([^\'"]+)[\'"]', 'SetSeparator', ''), + lexer.Token('LIMIT_END', r'(.+) SEPARATED BY', 'SetLimit', 'SEPARATE'), + lexer.Token('LIMIT_END', r'(.+)$', 'SetLimit', 'END')] + + def __init__(self, data=''): + """Initialize the lexer.""" + self.fields = [] + self.limit = 0 + self.lex_filter = None + self.separator = u',' + super(SelectiveLexer, self).__init__(data) + + def SetFilter(self, match, **_): + """Set the filter query.""" + filter_match = match.group(1) + if 'LIMIT' in filter_match: + # This only occurs in the case where we have "LIMIT X SEPARATED BY". + self.lex_filter, _, push_back = filter_match.rpartition('LIMIT') + self.PushBack('LIMIT {} SEPARATED BY '.format(push_back)) + else: + self.lex_filter = filter_match + + def SetSeparator(self, match, **_): + """Set the separator of the output, only uses the first char.""" + separator = match.group(1) + if separator: + self.separator = separator[0] + + def SetLimit(self, match, **_): + """Set the row limit.""" + try: + limit = int(match.group(1)) + except ValueError: + self.Error('Invalid limit value, should be int [{}] = {}'.format( + type(match.group(1)), match.group(1))) + limit = 0 + + self.limit = limit + + def SetFields(self, match, **_): + """Set the selective fields.""" + text = match.group(1).lower() + field_text, _, _ = text.partition(' from ') + + use_field_text = field_text.replace(' ', '') + if ',' in use_field_text: + self.fields = use_field_text.split(',') + else: + self.fields = [use_field_text] + + +class DynamicFilter(eventfilter.EventObjectFilter): + """A twist to the EventObjectFilter allowing output fields to be selected. + + This filter is essentially the same as the EventObjectFilter except it wraps + it in a selection of which fields should be included by an output module that + has support for selective fields. That is to say the filter: + + SELECT field_a, field_b WHERE attribute contains 'text' + + Will use the EventObjectFilter "attribute contains 'text'" and at the same + time indicate to the appropriate output module that the user wants only the + fields field_a and field_b to be used in the output. + """ + + @property + def fields(self): + """Set the fields property.""" + return self._fields + + @property + def limit(self): + """Return the limit of row counts.""" + return self._limit + + @property + def separator(self): + """Return the separator value.""" + return self._separator + + def __init__(self): + """Initialize the selective EventObjectFilter.""" + super(DynamicFilter, self).__init__() + self._fields = [] + self._limit = 0 + self._separator = u',' + + def CompileFilter(self, filter_string): + """Compile the filter string into a EventObjectFilter matcher.""" + lex = SelectiveLexer(filter_string) + + _ = lex.NextToken() + if lex.error: + raise errors.WrongPlugin('Malformed filter string.') + + _ = lex.NextToken() + if lex.error: + raise errors.WrongPlugin('No fields defined.') + + if lex.state is not 'END': + while lex.state is not 'END': + _ = lex.NextToken() + if lex.error: + raise errors.WrongPlugin('No filter defined for DynamicFilter.') + + if lex.state != 'END': + raise errors.WrongPlugin( + 'Malformed DynamicFilter, end state not reached.') + + self._fields = lex.fields + self._limit = lex.limit + self._separator = unicode(lex.separator) + + if lex.lex_filter: + super(DynamicFilter, self).CompileFilter(lex.lex_filter) + else: + self.matcher = None + diff --git a/plaso/filters/dynamic_filter_test.py b/plaso/filters/dynamic_filter_test.py new file mode 100644 index 0000000..d4c722e --- /dev/null +++ b/plaso/filters/dynamic_filter_test.py @@ -0,0 +1,85 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the DynamicFilter filter.""" +import unittest + +from plaso.filters import dynamic_filter +from plaso.filters import test_helper + + +class DynamicFilterTest(test_helper.FilterTestHelper): + """Tests for the DynamicFilter filter.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self.test_filter = dynamic_filter.DynamicFilter() + + def testFilterFail(self): + """Run few tests that should not be a proper filter.""" + self.TestFail('/tmp/file_that_most_likely_does_not_exist') + self.TestFail('some random stuff that is destined to fail') + self.TestFail('some_stuff is "random" and other_stuff ') + self.TestFail('some_stuff is "random" and other_stuff is not "random"') + self.TestFail('SELECT stuff FROM machine WHERE conditions are met') + self.TestFail('SELECT field_a, field_b WHERE ') + self.TestFail('SELECT field_a, field_b SEPARATED BY') + self.TestFail('SELECT field_a, SEPARATED BY field_b WHERE ') + self.TestFail('SELECT field_a, field_b LIMIT WHERE') + + def testFilterApprove(self): + self.TestTrue('SELECT stuff FROM machine WHERE some_stuff is "random"') + self.TestTrue('SELECT field_a, field_b, field_c') + self.TestTrue('SELECT field_a, field_b, field_c SEPARATED BY "%"') + self.TestTrue('SELECT field_a, field_b, field_c LIMIT 10') + self.TestTrue('SELECT field_a, field_b, field_c LIMIT 10 SEPARATED BY "|"') + self.TestTrue('SELECT field_a, field_b, field_c SEPARATED BY "|" LIMIT 10') + self.TestTrue('SELECT field_a, field_b, field_c WHERE date > "2012"') + self.TestTrue( + 'SELECT field_a, field_b, field_c WHERE date > "2012" LIMIT 100') + self.TestTrue(( + 'SELECT field_a, field_b, field_c WHERE date > "2012" SEPARATED BY "@"' + ' LIMIT 100')) + self.TestTrue(( + 'SELECT parser, date, time WHERE some_stuff is "random" and ' + 'date < "2021-02-14 14:51:23"')) + + def testFilterFields(self): + query = 'SELECT stuff FROM machine WHERE some_stuff is "random"' + self.test_filter.CompileFilter(query) + self.assertEquals(['stuff'], self.test_filter.fields) + + query = 'SELECT stuff, a, b, date FROM machine WHERE some_stuff is "random"' + self.test_filter.CompileFilter(query) + self.assertEquals(['stuff', 'a', 'b', 'date'], self.test_filter.fields) + + query = 'SELECT date, message, zone, hostname WHERE some_stuff is "random"' + self.test_filter.CompileFilter(query) + self.assertEquals(['date', 'message', 'zone', 'hostname'], + self.test_filter.fields) + + query = 'SELECT hlutir' + self.test_filter.CompileFilter(query) + self.assertEquals(['hlutir'], self.test_filter.fields) + + query = 'SELECT hlutir LIMIT 10' + self.test_filter.CompileFilter(query) + self.assertEquals(['hlutir'], self.test_filter.fields) + self.assertEquals(10, self.test_filter.limit) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/filters/eventfilter.py b/plaso/filters/eventfilter.py new file mode 100644 index 0000000..6a4e760 --- /dev/null +++ b/plaso/filters/eventfilter.py @@ -0,0 +1,40 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains definition for a simple filter.""" +from plaso.lib import errors +from plaso.lib import filter_interface +from plaso.lib import pfilter + + +class EventObjectFilter(filter_interface.FilterObject): + """A simple filter using the objectfilter library.""" + + def CompileFilter(self, filter_string): + """Compile the filter string into a filter matcher.""" + self.matcher = pfilter.GetMatcher(filter_string, True) + if not self.matcher: + raise errors.WrongPlugin('Malformed filter string.') + + def Match(self, event_object): + """Evaluate an EventObject against a filter.""" + if not self.matcher: + return True + + self._decision = self.matcher.Matches(event_object) + + return self._decision + diff --git a/plaso/filters/eventfilter_test.py b/plaso/filters/eventfilter_test.py new file mode 100644 index 0000000..4740f2f --- /dev/null +++ b/plaso/filters/eventfilter_test.py @@ -0,0 +1,43 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the EventObjectFilter filter.""" +import unittest + +from plaso.filters import test_helper +from plaso.filters import eventfilter + + +class EventObjectFilterTest(test_helper.FilterTestHelper): + """Tests for the EventObjectFilter filter.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self.test_filter = eventfilter.EventObjectFilter() + + def testFilterFail(self): + """Run few tests that should not be a proper filter.""" + self.TestFail('SELECT stuff FROM machine WHERE conditions are met') + self.TestFail('/tmp/file_that_most_likely_does_not_exist') + self.TestFail('some random stuff that is destined to fail') + self.TestFail('some_stuff is "random" and other_stuff ') + + def testFilterApprove(self): + self.TestTrue('some_stuff is "random" and other_stuff is not "random"') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/filters/filterlist.py b/plaso/filters/filterlist.py new file mode 100644 index 0000000..0b933f4 --- /dev/null +++ b/plaso/filters/filterlist.py @@ -0,0 +1,109 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains definition for a list of ObjectFilter.""" +import os +import yaml +import logging + +from plaso.lib import errors +from plaso.lib import filter_interface +from plaso.lib import pfilter + + +def IncludeKeyword(loader, node): + """A constructor for the include keyword in YAML.""" + filename = loader.construct_scalar(node) + if os.path.isfile(filename): + with open(filename, 'rb') as fh: + try: + data = yaml.safe_load(fh) + except yaml.ParserError as exception: + logging.error(u'Unable to load rule file with error: {0:s}'.format( + exception)) + return None + return data + + +class ObjectFilterList(filter_interface.FilterObject): + """A series of Pfilter filters along with metadata.""" + + def CompileFilter(self, filter_string): + """Compile a set of ObjectFilters defined in an YAML file.""" + if not os.path.isfile(filter_string): + raise errors.WrongPlugin(( + 'ObjectFilterList requires an YAML file to be passed on, this filter ' + 'string is not a file.')) + + yaml.add_constructor('!include', IncludeKeyword, + Loader=yaml.loader.SafeLoader) + results = None + + with open(filter_string, 'rb') as fh: + try: + results = yaml.safe_load(fh) + except (yaml.scanner.ScannerError, IOError) as exception: + raise errors.WrongPlugin( + u'Unable to parse YAML file with error: {0:s}.'.format(exception)) + + self.filters = [] + if type(results) is dict: + self._ParseEntry(results) + elif type(results) is list: + for result in results: + if type(result) is not dict: + raise errors.WrongPlugin( + u'Wrong format of YAML file, entry not a dict ({})'.format( + type(result))) + self._ParseEntry(result) + else: + raise errors.WrongPlugin( + u'Wrong format of YAML file, entry not a dict ({})'.format( + type(result))) + + def _ParseEntry(self, entry): + """Parse a single YAML filter entry.""" + # A single file with a list of filters to parse. + for name, meta in entry.items(): + if 'filter' not in meta: + raise errors.WrongPlugin( + u'Entry inside {} does not contain a filter statement.'.format( + name)) + + matcher = pfilter.GetMatcher(meta.get('filter'), True) + if not matcher: + raise errors.WrongPlugin( + u'Filter entry [{0:s}] malformed for rule: <{1:s}>'.format( + meta.get('filter'), name)) + + self.filters.append((name, matcher, meta)) + + def Match(self, event_object): + """Evaluate an EventObject against a pfilter.""" + if not self.filters: + return True + + for name, matcher, meta in self.filters: + self._decision = matcher.Matches(event_object) + if self._decision: + self._reason = u'[{}] {} {}'.format( + name, meta.get('description', 'N/A'), u' - '.join( + meta.get('urls', []))) + return True + + return False + + diff --git a/plaso/filters/filterlist_test.py b/plaso/filters/filterlist_test.py new file mode 100644 index 0000000..773d60e --- /dev/null +++ b/plaso/filters/filterlist_test.py @@ -0,0 +1,98 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the PFilters filter.""" + +import os +import logging +import tempfile +import unittest + +from plaso.filters import filterlist +from plaso.filters import test_helper + + +class ObjectFilterTest(test_helper.FilterTestHelper): + """Tests for the ObjectFilterList filter.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self.test_filter = filterlist.ObjectFilterList() + + def testFilterFail(self): + """Run few tests that should not be a proper filter.""" + self.TestFail('SELECT stuff FROM machine WHERE conditions are met') + self.TestFail('/tmp/file_that_most_likely_does_not_exist') + self.TestFail('some random stuff that is destined to fail') + self.TestFail('some_stuff is "random" and other_stuff ') + self.TestFail('some_stuff is "random" and other_stuff is not "random"') + + def CreateFileAndTest(self, content): + """Creates a file and then runs the test.""" + name = '' + with tempfile.NamedTemporaryFile(delete=False) as file_object: + name = file_object.name + file_object.write(content) + + self.TestTrue(name) + + try: + os.remove(name) + except (OSError, IOError) as exception: + logging.warning( + u'Unable to remove temporary file: {0:s} with error: {1:s}'.format( + name, exception)) + + def testFilterApprove(self): + one_rule = u'\n'.join([ + u'Again_Dude:', + u' description: Heavy artillery caught on fire', + u' case_nr: 62345', + u' analysts: [anonymous]', + u' urls: [cnn.com,microsoft.com]', + u' filter: message contains "dude where is my car"']) + + self.CreateFileAndTest(one_rule) + + collection = u'\n'.join([ + u'Rule_Dude:', + u' description: This is the very case I talk about, a lot', + u' case_nr: 1235', + u' analysts: [dude, jack, horn]', + u' urls: [mbl.is,visir.is]', + (u' filter: date > "2012-01-01 10:54:13" and parser not contains ' + u'"evtx"'), + u'', + u'Again_Dude:', + u' description: Heavy artillery caught on fire', + u' case_nr: 62345', + u' analysts: [smith, perry, john]', + u' urls: [cnn.com,microsoft.com]', + u' filter: message contains "dude where is my car"', + u'', + u'Third_Rule_Of_Thumb:', + u' description: Another ticket for another day.', + u' case_nr: 234', + u' analysts: [joe]', + u' urls: [mbl.is,symantec.com/whereevillies,virustotal.com/myhash]', + u' filter: evil_bit is 1']) + + self.CreateFileAndTest(collection) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/filters/test_helper.py b/plaso/filters/test_helper.py new file mode 100644 index 0000000..c68e9cd --- /dev/null +++ b/plaso/filters/test_helper.py @@ -0,0 +1,50 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains helper function and classes for filters.""" +import unittest + +from plaso.lib import errors + + +class FilterTestHelper(unittest.TestCase): + """A simple class that provides helper functions for testing filters.""" + + def setUp(self): + """This should be overwritten.""" + self.test_filter = None + + def TestTrue(self, query): + """A quick test that should return a valid filter.""" + if not self.test_filter: + self.assertTrue(False) + + try: + self.test_filter.CompileFilter(query) + # And a success. + self.assertTrue(True) + except errors.WrongPlugin: + # Let the test fail. + self.assertTrue(False) + + def TestFail(self, query): + """A quick failure test with a filter.""" + if not self.test_filter: + self.assertTrue(False) + + with self.assertRaises(errors.WrongPlugin): + self.test_filter.CompileFilter(query) + diff --git a/plaso/formatters/__init__.py b/plaso/formatters/__init__.py new file mode 100644 index 0000000..588e3f1 --- /dev/null +++ b/plaso/formatters/__init__.py @@ -0,0 +1,86 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an import statement for each formatter.""" + +from plaso.formatters import android_app_usage +from plaso.formatters import android_calls +from plaso.formatters import android_sms +from plaso.formatters import appcompatcache +from plaso.formatters import appusage +from plaso.formatters import asl +from plaso.formatters import bencode_parser +from plaso.formatters import bsm +from plaso.formatters import chrome +from plaso.formatters import chrome_cache +from plaso.formatters import chrome_cookies +from plaso.formatters import chrome_extension_activity +from plaso.formatters import cups_ipp +from plaso.formatters import filestat +from plaso.formatters import firefox +from plaso.formatters import firefox_cache +from plaso.formatters import firefox_cookies +from plaso.formatters import ganalytics +from plaso.formatters import gdrive +from plaso.formatters import hachoir +from plaso.formatters import iis +from plaso.formatters import ipod +from plaso.formatters import java_idx +from plaso.formatters import ls_quarantine +from plaso.formatters import mac_appfirewall +from plaso.formatters import mac_document_versions +from plaso.formatters import mac_keychain +from plaso.formatters import mac_securityd +from plaso.formatters import mac_wifi +from plaso.formatters import mackeeper_cache +from plaso.formatters import mactime +from plaso.formatters import mcafeeav +from plaso.formatters import msie_webcache +from plaso.formatters import msiecf +from plaso.formatters import olecf +from plaso.formatters import opera +from plaso.formatters import oxml +from plaso.formatters import pcap +from plaso.formatters import plist +from plaso.formatters import popcontest +from plaso.formatters import pls_recall +from plaso.formatters import recycler +from plaso.formatters import rubanetra +from plaso.formatters import safari +from plaso.formatters import selinux +from plaso.formatters import shell_items +from plaso.formatters import skydrivelog +from plaso.formatters import skydrivelogerr +from plaso.formatters import skype +from plaso.formatters import symantec +from plaso.formatters import syslog +from plaso.formatters import task_scheduler +from plaso.formatters import text +from plaso.formatters import utmp +from plaso.formatters import utmpx +from plaso.formatters import windows +from plaso.formatters import winevt +from plaso.formatters import winevtx +from plaso.formatters import winfirewall +from plaso.formatters import winjob +from plaso.formatters import winlnk +from plaso.formatters import winprefetch +from plaso.formatters import winreg +from plaso.formatters import winregservice +from plaso.formatters import xchatlog +from plaso.formatters import xchatscrollback +from plaso.formatters import zeitgeist diff --git a/plaso/formatters/android_app_usage.py b/plaso/formatters/android_app_usage.py new file mode 100644 index 0000000..14fd6a1 --- /dev/null +++ b/plaso/formatters/android_app_usage.py @@ -0,0 +1,33 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Android Application Usage.""" + +from plaso.formatters import interface + + +class AndroidApplicationFormatter(interface.ConditionalEventFormatter): + """Formatter for an Application Last Resumed event.""" + + DATA_TYPE = 'android:event:last_resume_time' + + FORMAT_STRING_PIECES = [ + u'Package: {package}', + u'Component: {component}'] + + SOURCE_LONG = 'Android App Usage' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/android_calls.py b/plaso/formatters/android_calls.py new file mode 100644 index 0000000..fefdbf0 --- /dev/null +++ b/plaso/formatters/android_calls.py @@ -0,0 +1,37 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Android contacts2.db database events.""" + +from plaso.formatters import interface + + +class AndroidCallFormatter(interface.ConditionalEventFormatter): + """Formatter for Android call history events.""" + + DATA_TYPE = 'android:event:call' + + FORMAT_STRING_PIECES = [ + u'{call_type}', + u'Number: {number}', + u'Name: {name}', + u'Duration: {duration} seconds'] + + FORMAT_STRING_SHORT_PIECES = [u'{call_type} Call'] + + SOURCE_LONG = 'Android Call History' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/android_sms.py b/plaso/formatters/android_sms.py new file mode 100644 index 0000000..729bd5d --- /dev/null +++ b/plaso/formatters/android_sms.py @@ -0,0 +1,37 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Android mmssms.db database events.""" + +from plaso.formatters import interface + + +class AndroidSmsFormatter(interface.ConditionalEventFormatter): + """Formatter for Android sms events.""" + + DATA_TYPE = 'android:messaging:sms' + + FORMAT_STRING_PIECES = [ + u'Type: {sms_type}', + u'Address: {address}', + u'Status: {sms_read}', + u'Message: {body}'] + + FORMAT_STRING_SHORT_PIECES = [u'{body}'] + + SOURCE_LONG = 'Android SMS messages' + SOURCE_SHORT = 'SMS' diff --git a/plaso/formatters/appcompatcache.py b/plaso/formatters/appcompatcache.py new file mode 100644 index 0000000..9bcde57 --- /dev/null +++ b/plaso/formatters/appcompatcache.py @@ -0,0 +1,36 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the AppCompatCache entries inside the Windows Registry.""" + +from plaso.formatters import interface + + +class AppCompatCacheFormatter(interface.ConditionalEventFormatter): + """Formatter for an AppCompatCache Windows Registry entry.""" + + DATA_TYPE = 'windows:registry:appcompatcache' + + FORMAT_STRING_PIECES = [ + u'[{keyname}]', + u'Cached entry: {entry_index}', + u'Path: {path}'] + + FORMAT_STRING_SHORT_PIECES = [u'Path: {path}'] + + SOURCE_LONG = 'AppCompatCache Registry Entry' + SOURCE_SHORT = 'REG' diff --git a/plaso/formatters/appusage.py b/plaso/formatters/appusage.py new file mode 100644 index 0000000..a36ee73 --- /dev/null +++ b/plaso/formatters/appusage.py @@ -0,0 +1,33 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for the Mac OS X application usage.""" + +from plaso.formatters import interface + + +class ApplicationUsageFormatter(interface.EventFormatter): + """Define the formatting for Application Usage information.""" + + DATA_TYPE = 'macosx:application_usage' + + FORMAT_STRING = (u'{application} v.{app_version} (bundle: {bundle_id}).' + ' Launched: {count} time(s)') + FORMAT_STRING_SHORT = u'{application} ({count} time(s))' + + SOURCE_LONG = 'Application Usage' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/asl.py b/plaso/formatters/asl.py new file mode 100644 index 0000000..4840c22 --- /dev/null +++ b/plaso/formatters/asl.py @@ -0,0 +1,47 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Apple System Log binary files.""" + +from plaso.formatters import interface + + +class AslFormatter(interface.ConditionalEventFormatter): + """Formatter for an ASL log entry.""" + + DATA_TYPE = 'mac:asl:event' + + FORMAT_STRING_PIECES = [ + u'MessageID: {message_id}', + u'Level: {level}', + u'User ID: {user_sid}', + u'Group ID: {group_id}', + u'Read User: {read_uid}', + u'Read Group: {read_gid}', + u'Host: {computer_name}', + u'Sender: {sender}', + u'Facility: {facility}', + u'Message: {message}', + u'{extra_information}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Host: {host}', + u'Sender: {sender}', + u'Facility: {facility}'] + + SOURCE_LONG = 'ASL entry' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/bencode_parser.py b/plaso/formatters/bencode_parser.py new file mode 100644 index 0000000..bdf2625 --- /dev/null +++ b/plaso/formatters/bencode_parser.py @@ -0,0 +1,49 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for bencode parser events.""" + +from plaso.formatters import interface + + +class uTorrentFormatter(interface.ConditionalEventFormatter): + """Formatter for a BitTorrent uTorrent active torrents.""" + + DATA_TYPE = 'p2p:bittorrent:utorrent' + + SOURCE_LONG = 'uTorrent Active Torrents' + SOURCE_SHORT = 'TORRENT' + + FORMAT_STRING_SEPARATOR = u'; ' + + FORMAT_STRING_PIECES = [u'Torrent {caption}', + u'Saved to {path}', + u'Minutes seeded: {seedtime}'] + + +class TransmissionFormatter(interface.ConditionalEventFormatter): + """Formatter for a Transmission active torrents.""" + + DATA_TYPE = 'p2p:bittorrent:transmission' + + SOURCE_LONG = 'Transmission Active Torrents' + SOURCE_SHORT = 'TORRENT' + + FORMAT_STRING_SEPARATOR = u'; ' + + FORMAT_STRING_PIECES = [u'Saved to {destination}', + u'Minutes seeded: {seedtime}'] diff --git a/plaso/formatters/bsm.py b/plaso/formatters/bsm.py new file mode 100644 index 0000000..ba82667 --- /dev/null +++ b/plaso/formatters/bsm.py @@ -0,0 +1,54 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Basic Security Module binary files.""" + +from plaso.formatters import interface + + +class MacBSMFormatter(interface.ConditionalEventFormatter): + """Formatter for an BSM log entry.""" + + DATA_TYPE = 'mac:bsm:event' + + FORMAT_STRING_PIECES = [ + u'Type: {event_type}', + u'Return: {return_value}', + u'Information: {extra_tokens}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Type: {event_type}', + u'Return: {return_value}'] + + SOURCE_LONG = 'BSM entry' + SOURCE_SHORT = 'LOG' + + +class BSMFormatter(interface.ConditionalEventFormatter): + """Formatter for an BSM log entry.""" + + DATA_TYPE = 'bsm:event' + + FORMAT_STRING_PIECES = [ + u'Type: {event_type}', + u'Information: {extra_tokens}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Type: {event_type}'] + + SOURCE_LONG = 'BSM entry' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/chrome.py b/plaso/formatters/chrome.py new file mode 100644 index 0000000..3a9ef70 --- /dev/null +++ b/plaso/formatters/chrome.py @@ -0,0 +1,61 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for the Google Chrome history.""" + +from plaso.formatters import interface + + +class ChromePageVisitedFormatter(interface.ConditionalEventFormatter): + """The event formatter for page visited data in Chrome History.""" + + DATA_TYPE = 'chrome:history:page_visited' + + FORMAT_STRING_PIECES = [ + u'{url}', + u'({title})', + u'[count: {typed_count}]', + u'Host: {host}', + u'Visit from: {from_visit}', + u'Visit Source: [{visit_source}]', + u'{extra}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'{url}', + u'({title})'] + + SOURCE_LONG = 'Chrome History' + SOURCE_SHORT = 'WEBHIST' + + +class ChromeFileDownloadFormatter(interface.ConditionalEventFormatter): + """The event formatter for file downloaded data in Chrome History.""" + + DATA_TYPE = 'chrome:history:file_downloaded' + + FORMAT_STRING_PIECES = [ + u'{url}', + u'({full_path}).', + u'Received: {received_bytes} bytes', + u'out of: {total_bytes} bytes.'] + + FORMAT_STRING_SHORT_PIECES = [ + u'{full_path} downloaded', + u'({received_bytes} bytes)'] + + SOURCE_LONG = 'Chrome History' + SOURCE_SHORT = 'WEBHIST' diff --git a/plaso/formatters/chrome_cache.py b/plaso/formatters/chrome_cache.py new file mode 100644 index 0000000..32669b2 --- /dev/null +++ b/plaso/formatters/chrome_cache.py @@ -0,0 +1,32 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Chrome Cache files based-events.""" + +from plaso.formatters import interface + + +class ChromeCacheEntryEventFormatter(interface.ConditionalEventFormatter): + """Class contains the Chrome Cache Entry event formatter.""" + + DATA_TYPE = 'chrome:cache:entry' + + FORMAT_STRING_PIECES = [ + u'Original URL: {original_url}'] + + SOURCE_LONG = 'Chrome Cache' + SOURCE_SHORT = 'WEBHIST' diff --git a/plaso/formatters/chrome_cookies.py b/plaso/formatters/chrome_cookies.py new file mode 100644 index 0000000..c9ac405 --- /dev/null +++ b/plaso/formatters/chrome_cookies.py @@ -0,0 +1,40 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for the Google Chrome cookie.""" + +from plaso.formatters import interface + + +class ChromeCookieFormatter(interface.ConditionalEventFormatter): + """The event formatter for cookie data in Chrome Cookies database.""" + + DATA_TYPE = 'chrome:cookie:entry' + + FORMAT_STRING_PIECES = [ + u'{url}', + u'({cookie_name})', + u'Flags:', + u'[HTTP only] = {httponly}', + u'[Persistent] = {persistent}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'{host}', + u'({cookie_name})'] + + SOURCE_LONG = 'Chrome Cookies' + SOURCE_SHORT = 'WEBHIST' diff --git a/plaso/formatters/chrome_extension_activity.py b/plaso/formatters/chrome_extension_activity.py new file mode 100644 index 0000000..6a2dab0 --- /dev/null +++ b/plaso/formatters/chrome_extension_activity.py @@ -0,0 +1,47 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Google extension activity database events.""" + +from plaso.formatters import interface + + +class ChromeExtensionActivityEventFormatter( + interface.ConditionalEventFormatter): + """The event formatter for Chrome extension activity log entries.""" + + DATA_TYPE = 'chrome:extension_activity:activity_log' + + FORMAT_STRING_PIECES = [ + u'Chrome extension: {extension_id}', + u'Action type: {action_type}', + u'Activity identifier: {activity_id}', + u'Page URL: {page_url}', + u'Page title: {page_title}', + u'API name: {api_name}', + u'Args: {args}', + u'Other: {other}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'{extension_id}', + u'{api_name}', + u'{args}'] + + SOURCE_LONG = 'Chrome Extension Activity' + SOURCE_SHORT = 'WEBHIST' + + # TODO: add action_type string representation. diff --git a/plaso/formatters/cups_ipp.py b/plaso/formatters/cups_ipp.py new file mode 100644 index 0000000..707f143 --- /dev/null +++ b/plaso/formatters/cups_ipp.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for CUPS IPP file.""" + +from plaso.formatters import interface + + +class CupsIppFormatter(interface.ConditionalEventFormatter): + """Formatter for CUPS IPP file.""" + + DATA_TYPE = 'cups:ipp:event' + + FORMAT_STRING_PIECES = [ + u'Status: {status}', + u'User: {user}', + u'Owner: {owner}', + u'Job Name: {job_name}', + u'Application: {application}', + u'Document type: {type_doc}', + u'Printer: {printer_id}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Status: {status}', + u'Job Name: {job_name}'] + + SOURCE_LONG = 'CUPS IPP Log' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/filestat.py b/plaso/formatters/filestat.py new file mode 100644 index 0000000..c95e2fa --- /dev/null +++ b/plaso/formatters/filestat.py @@ -0,0 +1,66 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for the Stat object of a PFile.""" + +from plaso.lib import errors +from plaso.formatters import interface + + +class PfileStatFormatter(interface.ConditionalEventFormatter): + """Define the formatting for PFileStat.""" + + DATA_TYPE = 'fs:stat' + + FORMAT_STRING_PIECES = [u'{display_name}', + u'({unallocated})'] + + FORMAT_STRING_SHORT_PIECES = [u'{filename}'] + + SOURCE_SHORT = 'FILE' + + def GetSources(self, event_object): + """Return a list of source short and long messages.""" + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter('Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + self.source_string = u'{0:s} {1:s}'.format( + getattr(event_object, 'fs_type', u'Unknown FS'), + getattr(event_object, 'timestamp_desc', u'Time')) + + return super(PfileStatFormatter, self).GetSources(event_object) + + def GetMessages(self, event_object): + """Returns a list of messages extracted from an event object. + + Args: + event_object: The event object (EventObject) containing the event + specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + """ + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + if not getattr(event_object, 'allocated', True): + event_object.unallocated = u'unallocated' + + return super(PfileStatFormatter, self).GetMessages(event_object) diff --git a/plaso/formatters/firefox.py b/plaso/formatters/firefox.py new file mode 100644 index 0000000..0809aeb --- /dev/null +++ b/plaso/formatters/firefox.py @@ -0,0 +1,136 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for the Mozilla Firefox history.""" + +from plaso.lib import errors +from plaso.formatters import interface + + +class FirefoxBookmarkAnnotationFormatter(interface.ConditionalEventFormatter): + """Formatter for a Firefox places.sqlite bookmark annotation.""" + + DATA_TYPE = 'firefox:places:bookmark_annotation' + + FORMAT_STRING_PIECES = [ + u'Bookmark Annotation: [{content}]', + u'to bookmark [{title}]', + u'({url})'] + + FORMAT_STRING_SHORT_PIECES = [u'Bookmark Annotation: {title}'] + + SOURCE_LONG = 'Firefox History' + SOURCE_SHORT = 'WEBHIST' + + +class FirefoxBookmarkFolderFormatter(interface.EventFormatter): + """Formatter for a Firefox places.sqlite bookmark folder.""" + + DATA_TYPE = 'firefox:places:bookmark_folder' + + FORMAT_STRING = u'{title}' + + SOURCE_LONG = 'Firefox History' + SOURCE_SHORT = 'WEBHIST' + + +class FirefoxBookmarkFormatter(interface.ConditionalEventFormatter): + """Formatter for a Firefox places.sqlite URL bookmark.""" + + DATA_TYPE = 'firefox:places:bookmark' + + FORMAT_STRING_PIECES = [ + u'Bookmark {type}', + u'{title}', + u'({url})', + u'[{places_title}]', + u'visit count {visit_count}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Bookmarked {title}', + u'({url})'] + + SOURCE_LONG = 'Firefox History' + SOURCE_SHORT = 'WEBHIST' + + +class FirefoxPageVisitFormatter(interface.ConditionalEventFormatter): + """Formatter for a Firefox places.sqlite page visited.""" + + DATA_TYPE = 'firefox:places:page_visited' + + # Transitions defined in the source file: + # src/toolkit/components/places/nsINavHistoryService.idl + # Also contains further explanation into what each of these settings mean. + _URL_TRANSITIONS = { + 1: 'LINK', + 2: 'TYPED', + 3: 'BOOKMARK', + 4: 'EMBED', + 5: 'REDIRECT_PERMANENT', + 6: 'REDIRECT_TEMPORARY', + 7: 'DOWNLOAD', + 8: 'FRAMED_LINK', + } + _URL_TRANSITIONS.setdefault('UNKOWN') + + # TODO: Make extra conditional formatting. + FORMAT_STRING_PIECES = [ + u'{url}', + u'({title})', + u'[count: {visit_count}]', + u'Host: {host}', + u'{extra_string}'] + + FORMAT_STRING_SHORT_PIECES = [u'URL: {url}'] + + SOURCE_LONG = 'Firefox History' + SOURCE_SHORT = 'WEBHIST' + + def GetMessages(self, event_object): + """Return the message strings.""" + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + transition = self._URL_TRANSITIONS.get( + getattr(event_object, 'visit_type', 0), None) + + if transition: + transition_str = u'Transition: {0!s}'.format(transition) + + if hasattr(event_object, 'extra'): + if transition: + event_object.extra.append(transition_str) + event_object.extra_string = u' '.join(event_object.extra) + elif transition: + event_object.extra_string = transition_str + + return super(FirefoxPageVisitFormatter, self).GetMessages(event_object) + + +class FirefoxDowloadFormatter(interface.EventFormatter): + """Formatter for a Firefox downloads.sqlite download.""" + + DATA_TYPE = 'firefox:downloads:download' + + FORMAT_STRING = (u'{url} ({full_path}). Received: {received_bytes} bytes ' + u'out of: {total_bytes} bytes.') + FORMAT_STRING_SHORT = u'{full_path} downloaded ({received_bytes} bytes)' + + SOURCE_LONG = 'Firefox History' + SOURCE_SHORT = 'WEBHIST' diff --git a/plaso/formatters/firefox_cache.py b/plaso/formatters/firefox_cache.py new file mode 100644 index 0000000..468c294 --- /dev/null +++ b/plaso/formatters/firefox_cache.py @@ -0,0 +1,39 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Firefox cache records.""" + +from plaso.formatters import interface + +class FirefoxCacheFormatter(interface.ConditionalEventFormatter): + """Formatter for Firefox cache record.""" + + DATA_TYPE = 'firefox:cache:record' + + FORMAT_STRING_PIECES = [ + u'Fetched {fetch_count} time(s)', + u'[{response_code}]', + u'{request_method}', + u'"{url}"'] + + FORMAT_STRING_SHORT_PIECES = [ + u'[{response_code}]', + u'{request_method}', + u'"{url}"'] + + SOURCE_LONG = 'Firefox Cache' + SOURCE_SHORT = 'WEBHIST' diff --git a/plaso/formatters/firefox_cookies.py b/plaso/formatters/firefox_cookies.py new file mode 100644 index 0000000..1034ff1 --- /dev/null +++ b/plaso/formatters/firefox_cookies.py @@ -0,0 +1,40 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for the Firefox cookie.""" + +from plaso.formatters import interface + + +class FirefoxCookieFormatter(interface.ConditionalEventFormatter): + """The event formatter for cookie data in Firefox Cookies database.""" + + DATA_TYPE = 'firefox:cookie:entry' + + FORMAT_STRING_PIECES = [ + u'{url}', + u'({cookie_name})', + u'Flags:', + u'[HTTP only]: {httponly}', + u'(GA analysis: {ga_data})'] + + FORMAT_STRING_SHORT_PIECES = [ + u'{host}', + u'({cookie_name})'] + + SOURCE_LONG = 'Firefox Cookies' + SOURCE_SHORT = 'WEBHIST' diff --git a/plaso/formatters/ganalytics.py b/plaso/formatters/ganalytics.py new file mode 100644 index 0000000..8339a30 --- /dev/null +++ b/plaso/formatters/ganalytics.py @@ -0,0 +1,70 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for the Google Analytics cookie.""" + +from plaso.formatters import interface + + +class AnalyticsUtmaCookieFormatter(interface.ConditionalEventFormatter): + """The event formatter for UTMA Google Analytics cookie.""" + + DATA_TYPE = 'cookie:google:analytics:utma' + + FORMAT_STRING_PIECES = [ + u'{url}', + u'({cookie_name})', + u'Sessions: {sessions}', + u'Domain Hash: {domain_hash}', + u'Visitor ID: {domain_hash}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'{url}', + u'({cookie_name})'] + + SOURCE_LONG = 'Google Analytics Cookies' + SOURCE_SHORT = 'WEBHIST' + + +class AnalyticsUtmbCookieFormatter(AnalyticsUtmaCookieFormatter): + """The event formatter for UTMB Google Analytics cookie.""" + + DATA_TYPE = 'cookie:google:analytics:utmb' + + FORMAT_STRING_PIECES = [ + u'{url}', + u'({cookie_name})', + u'Pages Viewed: {pages_viewed}', + u'Domain Hash: {domain_hash}'] + + +class AnalyticsUtmzCookieFormatter(AnalyticsUtmaCookieFormatter): + """The event formatter for UTMZ Google Analytics cookie.""" + + DATA_TYPE = 'cookie:google:analytics:utmz' + + FORMAT_STRING_PIECES = [ + u'{url}', + u'({cookie_name})', + u'Sessions: {sessions}', + u'Domain Hash: {domain_hash}', + u'Sources: {sources}', + u'Last source used to access: {utmcsr}', + u'Ad campaign information: {utmccn}', + u'Last type of visit: {utmcmd}', + u'Keywords used to find site: {utmctr}', + u'Path to the page of referring link: {utmcct}'] diff --git a/plaso/formatters/gdrive.py b/plaso/formatters/gdrive.py new file mode 100644 index 0000000..ad6d99b --- /dev/null +++ b/plaso/formatters/gdrive.py @@ -0,0 +1,55 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for the Google Drive snaphots.""" + +from plaso.formatters import interface + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class GDriveCloudEntryFormatter(interface.ConditionalEventFormatter): + """Formatter for Google Drive snapshot cloud entry.""" + + DATA_TYPE = 'gdrive:snapshot:cloud_entry' + + FORMAT_STRING_PIECES = [ + u'File Path: {path}', + u'[{shared}]', + u'Size: {size}', + u'URL: {url}', + u'Type: {document_type}'] + FORMAT_STRING_SHORT_PIECES = [u'{path}'] + + SOURCE_LONG = 'Google Drive (cloud entry)' + SOURCE_SHORT = 'LOG' + + +class GDriveLocalEntryFormatter(interface.ConditionalEventFormatter): + """Formatter for Google Drive snapshot local entry.""" + + DATA_TYPE = 'gdrive:snapshot:local_entry' + + FORMAT_STRING_PIECES = [ + u'File Path: {path}', + u'Size: {size}'] + + FORMAT_STRING_SHORT_PIECES = [u'{path}'] + + SOURCE_LONG = 'Google Drive (local entry)' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/hachoir.py b/plaso/formatters/hachoir.py new file mode 100644 index 0000000..d120089 --- /dev/null +++ b/plaso/formatters/hachoir.py @@ -0,0 +1,57 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Hachoir events.""" + +from plaso.lib import errors +from plaso.formatters import interface + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class HachoirFormatter(interface.EventFormatter): + """Formatter for Hachoir based events.""" + + DATA_TYPE = 'metadata:hachoir' + FORMAT_STRING = u'{data}' + + SOURCE_LONG = 'Hachoir Metadata' + SOURCE_SHORT = 'META' + + def GetMessages(self, event_object): + """Returns a list of messages extracted from an event object. + + Args: + event_object: The event object (EventObject) containing the event + specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + """ + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + string_parts = [] + for key, value in sorted(event_object.metadata.items()): + string_parts.append(u'{0:s}: {1:s}'.format(key, value)) + + event_object.data = u' '.join(string_parts) + + return super(HachoirFormatter, self).GetMessages(event_object) diff --git a/plaso/formatters/iis.py b/plaso/formatters/iis.py new file mode 100644 index 0000000..2f9228d --- /dev/null +++ b/plaso/formatters/iis.py @@ -0,0 +1,59 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Windows IIS log files.""" + +from plaso.formatters import interface + + +__author__ = 'Ashley Holtz (ashley.a.holtz@gmail.com)' + + +class WinIISFormatter(interface.ConditionalEventFormatter): + """A formatter for Windows IIS log entries.""" + + DATA_TYPE = 'iis:log:line' + + FORMAT_STRING_PIECES = [ + u'{http_method}', + u'{requested_uri_stem}', + u'[', + u'{source_ip}', + u'>', + u'{dest_ip}', + u':', + u'{dest_port}', + u']', + u'Http Status: {http_status}', + u'Bytes Sent: {sent_bytes}', + u'Bytes Received: {received_bytes}', + u'User Agent: {user_agent}', + u'Protocol Version: {protocol_version}',] + + FORMAT_STRING_SHORT_PIECES = [ + u'{http_method}', + u'{requested_uri_stem}', + u'[', + u'{source_ip}', + u'>', + u'{dest_ip}', + u':', + u'{dest_port}', + u']',] + + SOURCE_LONG = 'IIS Log' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/interface.py b/plaso/formatters/interface.py new file mode 100644 index 0000000..9c0b0d8 --- /dev/null +++ b/plaso/formatters/interface.py @@ -0,0 +1,244 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the event formatters interface classes.""" + +import re + +from plaso.lib import errors +from plaso.lib import registry + + +class EventFormatter(object): + """Base class to format event type specific data using a format string. + + Define the (long) format string and the short format string by defining + FORMAT_STRING and FORMAT_STRING_SHORT. The syntax of the format strings + is similar to that of format() where the place holder for a certain + event object attribute is defined as {attribute_name}. + """ + __metaclass__ = registry.MetaclassRegistry + __abstract = True + + # The data type is a unique identifier for the event data. The current + # approach is to define it as human readable string in the format + # root:branch: ... :leaf, e.g. a page visited entry inside a Chrome History + # database is defined as: chrome:history:page_visited. + DATA_TYPE = u'internal' + + # The format string. + FORMAT_STRING = u'' + FORMAT_STRING_SHORT = u'' + + # The source short and long strings. + SOURCE_SHORT = u'LOG' + SOURCE_LONG = u'' + + def __init__(self): + """Set up the formatter and determine if this is the right formatter.""" + # Forcing the format string to be unicode to make sure we don't + # try to format it as an ASCII string. + self.format_string = unicode(self.FORMAT_STRING) + self.format_string_short = unicode(self.FORMAT_STRING_SHORT) + self.source_string = unicode(self.SOURCE_LONG) + self.source_string_short = unicode(self.SOURCE_SHORT) + + def GetMessages(self, event_object): + """Return a list of messages extracted from an event object. + + The l2t_csv and other formats are dependent on a message field, + referred to as description_long and description_short in l2t_csv. + + Plaso does not store this field explicitly, it only contains a format + string and the appropriate attributes. + + This method takes the format string and converts that back into a + formatted string that can be used for display. + + Args: + event_object: The event object (EventObject) containing the event + specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + + Raises: + WrongFormatter: if the event object cannot be formatted by the formatter. + """ + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + event_values = event_object.GetValues() + + try: + msg = self.format_string.format(**event_values) + except KeyError as exception: + msgs = [] + msgs.append(u'Format error: [{0:s}] for: <{1:s}>'.format( + exception, self.format_string)) + for attr, value in event_object.GetValues().iteritems(): + msgs.append(u'{0}: {1}'.format(attr, value)) + + msg = u' '.join(msgs) + + # Strip carriage return and linefeed form the message strings. + # Using replace function here because it is faster + # than re.sub() or string.strip(). + msg = msg.replace('\r', u'').replace('\n', u'') + + if not self.format_string_short: + msg_short = msg + else: + try: + msg_short = self.format_string_short.format(**event_values) + # Using replace function here because it is faster + # than re.sub() or string.strip(). + msg_short = msg_short.replace('\r', u'').replace('\n', u'') + except KeyError: + msg_short = u'Unable to format short message string: {0:s}'.format( + self.format_string_short) + + # Truncate the short message string if necessary. + if len(msg_short) > 80: + msg_short = u'{0:s}...'.format(msg_short[0:77]) + + return msg, msg_short + + def GetSources(self, event_object): + """Return a list containing source short and long.""" + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter('Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + return self.source_string_short, self.source_string + + +class ConditionalEventFormatter(EventFormatter): + """Base class to conditionally format event data using format string pieces. + + Define the (long) format string and the short format string by defining + FORMAT_STRING_PIECES and FORMAT_STRING_SHORT_PIECES. The syntax of the + format strings pieces is similar to of the event formatter + (EventFormatter). Every format string piece should contain a single + attribute name or none. + + FORMAT_STRING_SEPARATOR is used to control the string which the separate + string pieces should be joined. It contains a space by default. + """ + __abstract = True + + # The format string pieces. + FORMAT_STRING_PIECES = [u''] + FORMAT_STRING_SHORT_PIECES = [u''] + + # The separator used to join the string pieces. + FORMAT_STRING_SEPARATOR = u' ' + + def __init__(self): + """Initializes the conditional formatter. + + A map is build of the string pieces and their corresponding attribute + name to optimize conditional string formatting. + + Raises: + RuntimeError: when an invalid format string piece is encountered. + """ + super(ConditionalEventFormatter, self).__init__() + + # The format string can be defined as: + # {name}, {name:format}, {name!conversion}, {name!conversion:format} + regexp = re.compile('{[a-z][a-zA-Z0-9_]*[!]?[^:}]*[:]?[^}]*}') + regexp_name = re.compile('[a-z][a-zA-Z0-9_]*') + + # The format string pieces map is a list containing the attribute name + # per format string piece. E.g. ["Description: {description}"] would be + # mapped to: [0] = "description". If the string piece does not contain + # an attribute name it is treated as text that does not needs formatting. + self._format_string_pieces_map = [] + for format_string_piece in self.FORMAT_STRING_PIECES: + result = regexp.findall(format_string_piece) + if not result: + # The text format string piece is stored as an empty map entry to + # keep the index in the map equal to the format string pieces. + self._format_string_pieces_map.append('') + elif len(result) == 1: + # Extract the attribute name. + attribute_name = regexp_name.findall(result[0])[0] + self._format_string_pieces_map.append(attribute_name) + else: + raise RuntimeError(( + u'Invalid format string piece: [{0:s}] contains more than 1 ' + u'attribute name.').format(format_string_piece)) + + self._format_string_short_pieces_map = [] + for format_string_piece in self.FORMAT_STRING_SHORT_PIECES: + result = regexp.findall(format_string_piece) + if not result: + # The text format string piece is stored as an empty map entry to + # keep the index in the map equal to the format string pieces. + self._format_string_short_pieces_map.append('') + elif len(result) == 1: + # Extract the attribute name. + attribute_name = regexp_name.findall(result[0])[0] + self._format_string_short_pieces_map.append(attribute_name) + else: + raise RuntimeError(( + u'Invalid short format string piece: [{0:s}] contains more ' + u'than 1 attribute name.').format(format_string_piece)) + + def GetMessages(self, event_object): + """Returns a list of messages extracted from an event object. + + Args: + event_object: The event object (EventObject) containing the event + specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + """ + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + # Using getattr here to make sure the attribute is not set to None. + # if A.b = None, hasattr(A, b) is True but getattr(A, b, None) is False. + string_pieces = [] + for map_index, attribute_name in enumerate(self._format_string_pieces_map): + if not attribute_name or hasattr(event_object, attribute_name): + if attribute_name: + attribute = getattr(event_object, attribute_name, None) + # If an attribute is an int, yet has zero value we want to include + # that in the format string, since that is still potentially valid + # information. Otherwise we would like to skip it. + if type(attribute) not in (bool, int, long, float) and not attribute: + continue + string_pieces.append(self.FORMAT_STRING_PIECES[map_index]) + self.format_string = unicode( + self.FORMAT_STRING_SEPARATOR.join(string_pieces)) + + string_pieces = [] + for map_index, attribute_name in enumerate( + self._format_string_short_pieces_map): + if not attribute_name or getattr(event_object, attribute_name, None): + string_pieces.append(self.FORMAT_STRING_SHORT_PIECES[map_index]) + self.format_string_short = unicode( + self.FORMAT_STRING_SEPARATOR.join(string_pieces)) + + return super(ConditionalEventFormatter, self).GetMessages(event_object) diff --git a/plaso/formatters/ipod.py b/plaso/formatters/ipod.py new file mode 100644 index 0000000..ac9a663 --- /dev/null +++ b/plaso/formatters/ipod.py @@ -0,0 +1,37 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the iPod device events.""" + +from plaso.formatters import interface + + +class IPodDeviceFormatter(interface.ConditionalEventFormatter): + """Formatter for iPod device events.""" + + DATA_TYPE = 'ipod:device:entry' + + FORMAT_STRING_PIECES = [ + u'Device ID: {device_id}', + u'Type: {device_class}', + u'[{family_id}]', + u'Connected {use_count} times', + u'Serial nr: {serial_number}', + u'IMEI [{imei}]'] + + SOURCE_LONG = 'iPod Connections' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/java_idx.py b/plaso/formatters/java_idx.py new file mode 100644 index 0000000..55cc94f --- /dev/null +++ b/plaso/formatters/java_idx.py @@ -0,0 +1,34 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Java Cache IDX events.""" + +from plaso.formatters import interface + + +class JavaIDXFormatter(interface.ConditionalEventFormatter): + """Formatter for a Java Cache IDX download item.""" + + DATA_TYPE = 'java:download:idx' + + SOURCE_LONG = 'Java Cache IDX' + SOURCE_SHORT = 'JAVA_IDX' + + FORMAT_STRING_PIECES = [ + u'IDX Version: {idx_version}', + u'Host IP address: ({ip_address})', + u'Download URL: {url}'] diff --git a/plaso/formatters/ls_quarantine.py b/plaso/formatters/ls_quarantine.py new file mode 100644 index 0000000..59a8f3a --- /dev/null +++ b/plaso/formatters/ls_quarantine.py @@ -0,0 +1,36 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Mac OS X launch services quarantine events.""" + +from plaso.formatters import interface + + +class LSQuarantineFormatter(interface.ConditionalEventFormatter): + """Formatter for a LS Quarantine history event.""" + + DATA_TYPE = 'macosx:lsquarantine' + + FORMAT_STRING_PIECES = [ + u'[{agent}]', + u'Downloaded: {url}', + u'<{data}>'] + + FORMAT_STRING_SHORT_PIECES = [u'{url}'] + + SOURCE_LONG = 'LS Quarantine Event' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/mac_appfirewall.py b/plaso/formatters/mac_appfirewall.py new file mode 100644 index 0000000..66b4a77 --- /dev/null +++ b/plaso/formatters/mac_appfirewall.py @@ -0,0 +1,39 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Mac appfirewall.log file.""" + +from plaso.formatters import interface + +class MacAppFirewallLogFormatter(interface.ConditionalEventFormatter): + """Formatter for Mac appfirewall.log file.""" + + DATA_TYPE = 'mac:asl:appfirewall:line' + + FORMAT_STRING_PIECES = [ + u'Computer: {computer_name}', + u'Agent: {agent}', + u'Status: {status}', + u'Process name: {process_name}', + u'Log: {action}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Process name: {process_name}', + u'Status: {status}'] + + SOURCE_LONG = 'Mac AppFirewall Log' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/mac_document_versions.py b/plaso/formatters/mac_document_versions.py new file mode 100644 index 0000000..9317f74 --- /dev/null +++ b/plaso/formatters/mac_document_versions.py @@ -0,0 +1,38 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for the Mac OS X Document Versions files.""" + +from plaso.formatters import interface + + +class MacDocumentVersionsFormatter(interface.ConditionalEventFormatter): + """The event formatter for page visited data in Document Versions.""" + + DATA_TYPE = 'mac:document_versions:file' + + FORMAT_STRING_PIECES = [ + u'Version of [{name}]', + u'({path})', + u'stored in {version_path}', + u'by {user_sid}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Stored a document version of [{name}]'] + + SOURCE_LONG = 'Document Versions' + SOURCE_SHORT = 'HISTORY' diff --git a/plaso/formatters/mac_keychain.py b/plaso/formatters/mac_keychain.py new file mode 100644 index 0000000..ee70616 --- /dev/null +++ b/plaso/formatters/mac_keychain.py @@ -0,0 +1,53 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Keychain password database file.""" + +from plaso.formatters import interface + + +class KeychainApplicationRecordFormatter(interface.ConditionalEventFormatter): + """Formatter for keychain application record event.""" + + DATA_TYPE = 'mac:keychain:application' + + FORMAT_STRING_PIECES = [ + u'Name: {entry_name}', + u'Account: {account_name}'] + + FORMAT_STRING_SHORT_PIECES = [u'{entry_name}'] + + SOURCE_LONG = 'Keychain Application password' + SOURCE_SHORT = 'LOG' + + +class KeychainInternetRecordFormatter(interface.ConditionalEventFormatter): + """Formatter for keychain internet record event.""" + + DATA_TYPE = 'mac:keychain:internet' + + FORMAT_STRING_PIECES = [ + u'Name: {entry_name}', + u'Account: {account_name}', + u'Where: {where}', + u'Protocol: {protocol}', + u'({type_protocol})'] + + FORMAT_STRING_SHORT_PIECES = [u'{entry_name}'] + + SOURCE_LONG = 'Keychain Internet password' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/mac_securityd.py b/plaso/formatters/mac_securityd.py new file mode 100644 index 0000000..3c25fd2 --- /dev/null +++ b/plaso/formatters/mac_securityd.py @@ -0,0 +1,39 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for ASL securityd log file.""" + +from plaso.formatters import interface + + +class MacSecuritydLogFormatter(interface.ConditionalEventFormatter): + """Formatter for ASL Securityd file.""" + + DATA_TYPE = 'mac:asl:securityd:line' + + FORMAT_STRING_PIECES = [ + u'Sender: {sender}', + u'({sender_pid})', + u'Level: {level}', + u'Facility: {facility}', + u'Text: {message}'] + + FORMAT_STRING_SHORT_PIECES = [u'Text: {message}'] + + SOURCE_LONG = 'Mac ASL Securityd Log' + SOURCE_SHORT = 'LOG' + diff --git a/plaso/formatters/mac_wifi.py b/plaso/formatters/mac_wifi.py new file mode 100644 index 0000000..a5415c5 --- /dev/null +++ b/plaso/formatters/mac_wifi.py @@ -0,0 +1,38 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Mac wifi.log file.""" + +from plaso.formatters import interface + + +class MacWifiLogFormatter(interface.ConditionalEventFormatter): + """Formatter for Mac Wifi file.""" + + DATA_TYPE = 'mac:wifilog:line' + + FORMAT_STRING_PIECES = [ + u'Action: {action}', + u'Agent: {user}', + u'({function})', + u'Log: {text}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Action: {action}'] + + SOURCE_LONG = 'Mac Wifi Log' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/mackeeper_cache.py b/plaso/formatters/mackeeper_cache.py new file mode 100644 index 0000000..8175bdd --- /dev/null +++ b/plaso/formatters/mackeeper_cache.py @@ -0,0 +1,35 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a MacKeepr Cache formatter in plaso.""" + +from plaso.formatters import interface + + +class MacKeeperCacheFormatter(interface.ConditionalEventFormatter): + """Formatter for MacKeeper Cache extracted events.""" + + DATA_TYPE = 'mackeeper:cache' + + FORMAT_STRING_PIECES = [ + u'{description}', u'<{event_type}>', u':', u'{text}', u'[', + u'URL: {url}', u'Event ID: {record_id}', 'Room: {room}', u']'] + + FORMAT_STRING_SHORT_PIECES = [u'<{event_type}>', u'{text}'] + + SOURCE_LONG = 'MacKeeper Cache' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/mactime.py b/plaso/formatters/mactime.py new file mode 100644 index 0000000..1bc2c87 --- /dev/null +++ b/plaso/formatters/mactime.py @@ -0,0 +1,32 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Sleuthkit (TSK) bodyfile or mactime format.""" + +from plaso.formatters import interface + + +class MactimeFormatter(interface.EventFormatter): + """Class that formats mactime bodyfile events.""" + + DATA_TYPE = 'fs:mactime:line' + + # The format string. + FORMAT_STRING = u'{filename}' + + SOURCE_LONG = 'Mactime Bodyfile' + SOURCE_SHORT = 'FILE' diff --git a/plaso/formatters/manager.py b/plaso/formatters/manager.py new file mode 100644 index 0000000..a0d16ff --- /dev/null +++ b/plaso/formatters/manager.py @@ -0,0 +1,140 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the event formatters manager class.""" + +import logging + +from plaso.formatters import interface +from plaso.lib import utils + + +class DefaultFormatter(interface.EventFormatter): + """Default formatter for events that do not have any defined formatter.""" + + DATA_TYPE = u'event' + FORMAT_STRING = u' Attributes: {attribute_driven}' + FORMAT_STRING_SHORT = u' {attribute_driven}' + + def GetMessages(self, event_object): + """Return a list of messages extracted from an event object.""" + text_pieces = [] + + for key, value in event_object.GetValues().items(): + if key in utils.RESERVED_VARIABLES: + continue + text_pieces.append(u'{0:s}: {1!s}'.format(key, value)) + + event_object.attribute_driven = u' '.join(text_pieces) + # Due to the way the default formatter behaves it requires the data_type + # to be set as 'event', otherwise it will complain and deny processing + # the event. + # TODO: Change this behavior and allow the default formatter to accept + # arbitrary data types (as it should). + old_data_type = getattr(event_object, 'data_type', None) + event_object.data_type = self.DATA_TYPE + msg, msg_short = super(DefaultFormatter, self).GetMessages(event_object) + event_object.data_type = old_data_type + return msg, msg_short + + +class EventFormatterManager(object): + """Class to manage the event formatters.""" + + @classmethod + def GetFormatter(cls, event_object): + """Retrieves the formatter for a specific event object. + + This function builds a map of data types and the corresponding event + formatters. At the moment this map is only build once. + + Args: + event_object: The event object (EventObject) which is used to identify + the formatter. + + Returns: + The corresponding formatter (EventFormatter) if available or None. + + Raises: + RuntimeError if a duplicate event formatter is found while building + the map of event formatters. + """ + if not hasattr(cls, 'event_formatters'): + cls.event_formatters = {} + cls.default_formatter = DefaultFormatter() + for cls_formatter in interface.EventFormatter.classes: + try: + formatter = interface.EventFormatter.classes[cls_formatter]() + + # Raise on duplicate formatters. + if formatter.DATA_TYPE in cls.event_formatters: + raise RuntimeError(( + u'event formatter for data type: {0:s} defined in: {1:s} and ' + u'{2:s}.').format( + formatter.DATA_TYPE, cls_formatter, + cls.event_formatters[ + formatter.DATA_TYPE].__class__.__name__)) + cls.event_formatters[formatter.DATA_TYPE] = formatter + except RuntimeError as exeception: + # Ignore broken formatters. + logging.warning(u'{0:s}'.format(exeception)) + + cls.event_formatters.setdefault(None) + + if event_object.data_type in cls.event_formatters: + return cls.event_formatters[event_object.data_type] + else: + logging.warning( + u'Using default formatter for data type: {0:s}'.format( + event_object.data_type)) + return cls.default_formatter + + @classmethod + def GetMessageStrings(cls, event_object): + """Retrieves the formatted message strings for a specific event object. + + Args: + event_object: The event object (EventObject) which is used to identify + the formatter. + + Returns: + A list that contains both the longer and shorter version of the message + string. + """ + formatter = cls.GetFormatter(event_object) + if not formatter: + return u'', u'' + return formatter.GetMessages(event_object) + + @classmethod + def GetSourceStrings(cls, event_object): + """Retrieves the formatted source long and short strings for an event. + + Args: + event_object: The event object (EventObject) which is used to identify + the formatter. + + Returns: + A list that contains the source_short and source_long version of the + event. + """ + # TODO: change this to return the long variant first so it is consistent + # with GetMessageStrings. + formatter = cls.GetFormatter(event_object) + if not formatter: + return u'', u'' + return formatter.GetSources(event_object) diff --git a/plaso/formatters/manager_test.py b/plaso/formatters/manager_test.py new file mode 100644 index 0000000..608d068 --- /dev/null +++ b/plaso/formatters/manager_test.py @@ -0,0 +1,163 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a unit test for the event formatters.""" + +import unittest + +from plaso.formatters import interface +from plaso.formatters import manager +from plaso.formatters import winreg # pylint: disable=unused-import +from plaso.lib import event_test + + +class TestEvent1Formatter(interface.EventFormatter): + """Test event 1 formatter.""" + DATA_TYPE = 'test:event1' + FORMAT_STRING = u'{text}' + + SOURCE_SHORT = 'FILE' + SOURCE_LONG = 'Weird Log File' + + +class WrongEventFormatter(interface.EventFormatter): + """A simple event formatter.""" + DATA_TYPE = 'test:wrong' + FORMAT_STRING = u'This format string does not match {body}.' + + SOURCE_SHORT = 'FILE' + SOURCE_LONG = 'Weird Log File' + + +class EventFormatterUnitTest(unittest.TestCase): + """The unit test for the event formatter.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._formatters_manager = manager.EventFormatterManager + self.event_objects = event_test.GetEventObjects() + + def GetCSVLine(self, event_object): + """Takes an EventObject and prints out a simple CSV line from it.""" + try: + msg, _ = self._formatters_manager.GetMessageStrings(event_object) + source_short, source_long = self._formatters_manager.GetSourceStrings( + event_object) + except KeyError: + print event_object.GetAttributes() + return u'{0:d},{1:s},{2:s},{3:s}'.format( + event_object.timestamp, source_short, source_long, msg) + + def testInitialization(self): + """Test the initialization.""" + self.assertTrue(TestEvent1Formatter()) + + def testAttributes(self): + """Test if we can read the event attributes correctly.""" + events = {} + for event_object in self.event_objects: + events[self.GetCSVLine(event_object)] = True + + self.assertIn(( + u'1334961526929596,REG,UNKNOWN key,[MY AutoRun key] Run: ' + u'c:/Temp/evil.exe'), events) + + self.assertIn( + (u'1334966206929596,REG,UNKNOWN key,[//HKCU/Secret/EvilEmpire/' + u'Malicious_key] Value: send all the exes to the other ' + u'world'), events) + self.assertIn((u'1334940286000000,REG,UNKNOWN key,[//HKCU/Windows' + u'/Normal] Value: run all the benign stuff'), events) + self.assertIn((u'1335781787929596,FILE,Weird Log File,This log line reads ' + u'ohh so much.'), events) + self.assertIn((u'1335781787929596,FILE,Weird Log File,Nothing of interest' + u' here, move on.'), events) + self.assertIn((u'1335791207939596,FILE,Weird Log File,Mr. Evil just logged' + u' into the machine and got root.'), events) + + def testTextBasedEvent(self): + """Test a text based event.""" + for event_object in self.event_objects: + source_short, _ = self._formatters_manager.GetSourceStrings(event_object) + if source_short == 'LOG': + msg, msg_short = self._formatters_manager.GetMessageStrings( + event_object) + + self.assertEquals(msg, ( + u'This is a line by someone not reading the log line properly. And ' + u'since this log line exceeds the accepted 80 chars it will be ' + u'shortened.')) + self.assertEquals(msg_short, ( + u'This is a line by someone not reading the log line properly. ' + u'And since this l...')) + + +class ConditionalTestEvent1(event_test.TestEvent1): + DATA_TYPE = 'test:conditional_event1' + + +class ConditionalTestEvent1Formatter(interface.ConditionalEventFormatter): + """Test event 1 conditional (event) formatter.""" + DATA_TYPE = 'test:conditional_event1' + FORMAT_STRING_PIECES = [ + u'Description: {description}', + u'Comment', + u'Value: 0x{numeric:02x}', + u'Optional: {optional}', + u'Text: {text}'] + + SOURCE_SHORT = 'LOG' + SOURCE_LONG = 'Some Text File.' + + +class BrokenConditionalEventFormatter(interface.ConditionalEventFormatter): + """A broken conditional event formatter.""" + DATA_TYPE = 'test:broken_conditional' + FORMAT_STRING_PIECES = [u'{too} {many} formatting placeholders'] + + SOURCE_SHORT = 'LOG' + SOURCE_LONG = 'Some Text File.' + + +class ConditionalEventFormatterUnitTest(unittest.TestCase): + """The unit test for the conditional event formatter.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self.event_object = ConditionalTestEvent1(1335791207939596, { + 'numeric': 12, 'description': 'this is beyond words', + 'text': 'but we\'re still trying to say something about the event'}) + + def testInitialization(self): + """Test the initialization.""" + self.assertTrue(ConditionalTestEvent1Formatter()) + with self.assertRaises(RuntimeError): + BrokenConditionalEventFormatter() + + def testGetMessages(self): + """Test get messages.""" + event_formatter = ConditionalTestEvent1Formatter() + msg, _ = event_formatter.GetMessages(self.event_object) + + expected_msg = ( + u'Description: this is beyond words Comment Value: 0x0c ' + u'Text: but we\'re still trying to say something about the event') + self.assertEquals(msg, expected_msg) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/formatters/mcafeeav.py b/plaso/formatters/mcafeeav.py new file mode 100644 index 0000000..a66663f --- /dev/null +++ b/plaso/formatters/mcafeeav.py @@ -0,0 +1,34 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the McAfee AV Logs files.""" + +from plaso.formatters import interface + + +class McafeeAccessProtectionLogEventFormatter(interface.EventFormatter): + """Class that formats the McAfee Access Protection Log events.""" + + DATA_TYPE = 'av:mcafee:accessprotectionlog' + + # The format string. + FORMAT_STRING = (u'File Name: {filename} User: {username} {trigger_location} ' + u'{status} {rule} {action}') + FORMAT_STRING_SHORT = u'{filename} {action}' + + SOURCE_LONG = 'McAfee Access Protection Log' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/msie_webcache.py b/plaso/formatters/msie_webcache.py new file mode 100644 index 0000000..06cef60 --- /dev/null +++ b/plaso/formatters/msie_webcache.py @@ -0,0 +1,99 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatters for the MSIE WebCache ESE database events.""" + +from plaso.formatters import interface + + +class MsieWebCacheContainerEventFormatter(interface.ConditionalEventFormatter): + """Formatter for a MSIE WebCache ESE database Container_# table record.""" + + DATA_TYPE = 'msie:webcache:container' + + FORMAT_STRING_PIECES = [ + u'Entry identifier: {entry_identifier}', + u'Container identifier: {container_identifier}', + u'Cache identifier: {cache_identifier}', + u'URL: {url}', + u'Redirect URL: {redirect_url}', + u'Access count: {access_count}', + u'Sync count: {sync_count}', + u'Filename: {cached_filename}', + u'File extension: {file_extension}', + u'Cached file size: {cached_file_size}', + u'Request headers: {request_headers}', + u'Response headers: {response_headers}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'URL: {url}'] + + SOURCE_LONG = 'MSIE WebCache container record' + SOURCE_SHORT = 'WEBHIST' + + +class MsieWebCacheContainersEventFormatter(interface.ConditionalEventFormatter): + """Formatter for a MSIE WebCache ESE database Containers table record.""" + + DATA_TYPE = 'msie:webcache:containers' + + FORMAT_STRING_PIECES = [ + u'Container identifier: {container_identifier}', + u'Set identifier: {set_identifier}', + u'Name: {name}', + u'Directory: {directory}', + u'Table: Container_{container_identifier}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Directory: {directory}'] + + SOURCE_LONG = 'MSIE WebCache containers record' + SOURCE_SHORT = 'WEBHIST' + + +class MsieWebCacheLeakFilesEventFormatter(interface.ConditionalEventFormatter): + """Formatter for a MSIE WebCache ESE database LeakFiles table record.""" + + DATA_TYPE = 'msie:webcache:leak_file' + + FORMAT_STRING_PIECES = [ + u'Leak identifier: {leak_identifier}', + u'Filename: {cached_filename}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Filename: {cached_filename}'] + + SOURCE_LONG = 'MSIE WebCache partitions record' + SOURCE_SHORT = 'WEBHIST' + + +class MsieWebCachePartitionsEventFormatter(interface.ConditionalEventFormatter): + """Formatter for a MSIE WebCache ESE database Partitions table record.""" + + DATA_TYPE = 'msie:webcache:partitions' + + FORMAT_STRING_PIECES = [ + u'Partition identifier: {partition_identifier}', + u'Partition type: {partition_type}', + u'Directory: {directory}', + u'Table identifier: {table_identifier}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Directory: {directory}'] + + SOURCE_LONG = 'MSIE WebCache partitions record' + SOURCE_SHORT = 'WEBHIST' diff --git a/plaso/formatters/msiecf.py b/plaso/formatters/msiecf.py new file mode 100644 index 0000000..f55a898 --- /dev/null +++ b/plaso/formatters/msiecf.py @@ -0,0 +1,65 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Microsoft Internet Explorer (MSIE) Cache Files (CF) events.""" + +from plaso.lib import errors +from plaso.formatters import interface + + +class MsiecfUrlFormatter(interface.ConditionalEventFormatter): + """Formatter for a MSIECF URL item.""" + + DATA_TYPE = 'msiecf:url' + + FORMAT_STRING_PIECES = [ + u'Location: {url}', + u'Number of hits: {number_of_hits}', + u'Cached file size: {cached_file_size}', + u'HTTP headers: {http_headers_cleaned}', + u'{recovered_string}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Location: {url}'] + + SOURCE_LONG = 'MSIE Cache File URL record' + SOURCE_SHORT = 'WEBHIST' + + def GetMessages(self, event_object): + """Returns a list of messages extracted from an event object. + + Args: + event_object: The event object (EventObject) containing the event + specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + """ + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + if hasattr(event_object, 'http_headers'): + event_object.http_headers_cleaned = event_object.http_headers.replace( + '\r\n', ' - ') + # TODO: Could this be moved upstream since this is done in other parsers + # as well? + if getattr(event_object, 'recovered', None): + event_object.recovered_string = '[Recovered Entry]' + + return super(MsiecfUrlFormatter, self).GetMessages(event_object) diff --git a/plaso/formatters/olecf.py b/plaso/formatters/olecf.py new file mode 100644 index 0000000..d2d4f1a --- /dev/null +++ b/plaso/formatters/olecf.py @@ -0,0 +1,149 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatters for OLE Compound File (OLECF) events.""" + +from plaso.formatters import interface +from plaso.lib import errors + + +class OleCfItemFormatter(interface.EventFormatter): + """Formatter for an OLECF item.""" + + DATA_TYPE = 'olecf:item' + + FORMAT_STRING = u'Name: {name}' + FORMAT_STRING_SHORT = u'Name: {name}' + + SOURCE_LONG = 'OLECF Item' + SOURCE_SHORT = 'OLECF' + + +class OleCfDestListEntryFormatter(interface.ConditionalEventFormatter): + """Formatter for an OLECF DestList stream.""" + + DATA_TYPE = 'olecf:dest_list:entry' + + FORMAT_STRING_PIECES = [ + u'Entry: {entry_number}', + u'Pin status: {pin_status_string}', + u'Hostname: {hostname}', + u'Path: {path}', + u'Droid volume identifier: {droid_volume_identifier}', + u'Droid file identifier: {droid_file_identifier}', + u'Birth droid volume identifier: {birth_droid_volume_identifier}', + u'Birth droid file identifier: {birth_droid_file_identifier}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Entry: {entry_number}', + u'Pin status: {pin_status_string}', + u'Path: {path}'] + + def GetMessages(self, event_object): + """Returns a list of messages extracted from an event object. + + Args: + event_object: The event object (EventObject) containing the event + specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + """ + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + pin_status = getattr(event_object, 'pin_status', None) + if pin_status == 0xffffffff: + event_object.pin_status_string = u'Unpinned' + else: + event_object.pin_status_string = u'Pinned' + + return super(OleCfDestListEntryFormatter, self).GetMessages(event_object) + + +class OleCfDocumentSummaryInfoFormatter(interface.ConditionalEventFormatter): + """Formatter for an OLECF Summary Info property set stream.""" + + DATA_TYPE = 'olecf:document_summary_info' + + FORMAT_STRING_PIECES = [ + u'Number of bytes: {number_of_bytes}', + u'Number of lines: {number_of_lines}', + u'Number of paragraphs: {number_of_paragraphs}', + u'Number of slides: {number_of_slides}', + u'Number of notes: {number_of_notes}', + u'Number of hidden slides: {number_of_hidden_slides}', + u'Number of multi-media clips: {number_of_clips}', + u'Company: {company}', + u'Manager: {manager}', + u'Shared document: {shared_document}', + u'Application version: {application_version}', + u'Content type: {content_type}', + u'Content status: {content_status}', + u'Language: {language}', + u'Document version: {document_version}'] + + # TODO: add support for the following properties. + # u'Digital signature: {digital_signature}', + + FORMAT_STRING_SHORT_PIECES = [ + u'Company: {company}'] + + SOURCE_LONG = 'OLECF Document Summary Info' + SOURCE_SHORT = 'OLECF' + + +class OleCfSummaryInfoFormatter(interface.ConditionalEventFormatter): + """Formatter for an OLECF Summary Info property set stream.""" + + DATA_TYPE = 'olecf:summary_info' + + FORMAT_STRING_PIECES = [ + u'Title: {title}', + u'Subject: {subject}', + u'Author: {author}', + u'Keywords: {keywords}', + u'Comments: {comments}', + u'Template: {template}', + u'Revision number: {revision_number}', + u'Last saved by: {last_saved_by}', + u'Total edit time: {total_edit_time}', + u'Number of pages: {number_of_pages}', + u'Number of words: {number_of_words}', + u'Number of characters: {number_of_characters}', + u'Application: {application}', + u'Security: {security}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Title: {title}', + u'Subject: {subject}', + u'Author: {author}', + u'Revision number: {revision_number}'] + + SOURCE_LONG = 'OLECF Summary Info' + SOURCE_SHORT = 'OLECF' + + # TODO: add a function to print the security as a descriptive string. + _SECURITY_VALUES = { + 0x00000001: 'Password protected', + 0x00000002: 'Read-only recommended', + 0x00000004: 'Read-only enforced', + 0x00000008: 'Locked for annotations', + } + diff --git a/plaso/formatters/opera.py b/plaso/formatters/opera.py new file mode 100644 index 0000000..5eb1b97 --- /dev/null +++ b/plaso/formatters/opera.py @@ -0,0 +1,47 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Opera history events.""" + +from plaso.formatters import interface + + +class OperaGlobalHistoryFormatter(interface.ConditionalEventFormatter): + """Formatter for an Opera global history event.""" + + DATA_TYPE = 'opera:history:entry' + + FORMAT_STRING_PIECES = [ + u'{url}', + u'({title})', + u'[{description}]'] + + SOURCE_LONG = 'Opera Browser History' + SOURCE_SHORT = 'WEBHIST' + + +class OperaTypedHistoryFormatter(interface.ConditionalEventFormatter): + """Formatter for an Opera typed history event.""" + + DATA_TYPE = 'opera:history:typed_entry' + + FORMAT_STRING_PIECES = [ + u'{url}', + u'({entry_selection})'] + + SOURCE_LONG = 'Opera Browser History' + SOURCE_SHORT = 'WEBHIST' diff --git a/plaso/formatters/oxml.py b/plaso/formatters/oxml.py new file mode 100644 index 0000000..177b437 --- /dev/null +++ b/plaso/formatters/oxml.py @@ -0,0 +1,67 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for OpenXML events.""" + +from plaso.formatters import interface + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class OpenXMLParserFormatter(interface.ConditionalEventFormatter): + """Formatter for OXML events.""" + + DATA_TYPE = 'metadata:openxml' + + FORMAT_STRING_PIECES = [ + u'Creating App: {creating_app}', + u'App version: {app_version}', + u'Title: {title}', + u'Subject: {subject}', + u'Last saved by: {last_saved_by}', + u'Author: {author}', + u'Total edit time (secs): {total_edit_time}', + u'Keywords: {keywords}', + u'Comments: {comments}', + u'Revision Num: {revision_num}', + u'Template: {template}', + u'Num pages: {num_pages}', + u'Num words: {num_words}', + u'Num chars: {num_chars}', + u'Num chars with spaces: {num_chars_w_spaces}', + u'Num lines: {num_lines}', + u'Company: {company}', + u'Manager: {manager}', + u'Shared: {shared}', + u'Security: {security}', + u'Hyperlinks changed: {hyperlinks_changed}', + u'Links up to date: {links_up_to_date}', + u'Scale crop: {scale_crop}', + u'Digital signature: {dig_sig}', + u'Slides: {slides}', + u'Hidden slides: {hidden_slides}', + u'Presentation format: {presentation_format}', + u'MM clips: {mm_clips}', + u'Notes: {notes}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Title: {title}', + u'Subject: {subject}', + u'Author: {author}'] + + SOURCE_LONG = 'Open XML Metadata' + SOURCE_SHORT = 'META' diff --git a/plaso/formatters/pcap.py b/plaso/formatters/pcap.py new file mode 100644 index 0000000..9e67dbd --- /dev/null +++ b/plaso/formatters/pcap.py @@ -0,0 +1,50 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for PCAP files.""" + +from plaso.formatters import interface + + +__author__ = 'Dominique Kilman (lexistar97@gmail.com)' + + +class PCAPFormatter(interface.ConditionalEventFormatter): + """Define the formatting PCAP record.""" + + DATA_TYPE = 'metadata:pcap' + + FORMAT_STRING_PIECES = [ + u'Source IP: {source_ip}', + u'Destination IP: {dest_ip}', + u'Source Port: {source_port}', + u'Destination Port: {dest_port}', + u'Protocol: {protocol}', + u'Type: {stream_type}', + u'Size: {size}', + u'Protocol Data: {protocol_data}', + u'Stream Data: {stream_data}', + u'First Packet ID: {first_packet_id}', + u'Last Packet ID: {last_packet_id}', + u'Packet Count: {packet_count}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Type: {stream_type}', + u'First Packet ID: {first_packet_id}'] + + SOURCE_LONG = 'Packet Capture File (pcap)' + SOURCE_SHORT = 'PCAP' diff --git a/plaso/formatters/plist.py b/plaso/formatters/plist.py new file mode 100644 index 0000000..854451c --- /dev/null +++ b/plaso/formatters/plist.py @@ -0,0 +1,36 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for Plist Events.""" + +from plaso.formatters import interface + + +class PlistFormatter(interface.ConditionalEventFormatter): + """Event Formatter for plist keys.""" + + DATA_TYPE = 'plist:key' + + FORMAT_STRING_SEPARATOR = u'' + + FORMAT_STRING_PIECES = [ + u'{root}/', + u'{key}', + u' {desc}'] + + SOURCE_LONG = 'Plist Entry' + SOURCE_SHORT = 'PLIST' diff --git a/plaso/formatters/pls_recall.py b/plaso/formatters/pls_recall.py new file mode 100644 index 0000000..44e011e --- /dev/null +++ b/plaso/formatters/pls_recall.py @@ -0,0 +1,33 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for PL-Sql Recall events.""" + +from plaso.formatters import interface + + +class PlsRecallFormatter(interface.EventFormatter): + """Formatter for a for a PL-Sql Recall file container.""" + DATA_TYPE = 'PLSRecall:event' + SOURCE_LONG = 'PL-Sql Developer Recall file' + SOURCE_SHORT = 'PLSRecall' + + # The format string. + FORMAT_STRING = (u'Sequence #{sequence} User: {username} ' + u'Database Name: {database_name} Query: {query}') + FORMAT_STRING_SHORT = u'{sequence} {username} {database_name} {query}' + diff --git a/plaso/formatters/popcontest.py b/plaso/formatters/popcontest.py new file mode 100644 index 0000000..3b12227 --- /dev/null +++ b/plaso/formatters/popcontest.py @@ -0,0 +1,55 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Popularity Contest parser events.""" + +from plaso.formatters import interface + + +class PopularityContestSessionFormatter(interface.ConditionalEventFormatter): + """Formatter for Popularity Contest Session information.""" + + DATA_TYPE = 'popularity_contest:session:event' + + FORMAT_STRING_PIECES = [ + u'Session {session}', + u'{status}', + u'ID {hostid}', + u'[{details}]'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Session {session}', + u'{status}'] + + SOURCE_LONG = 'Popularity Contest Session' + SOURCE_SHORT = 'LOG' + + +class PopularityContestLogFormatter(interface.ConditionalEventFormatter): + """Formatter for Popularity Contest Log events.""" + + DATA_TYPE = 'popularity_contest:log:event' + + FORMAT_STRING_PIECES = [ + u'mru [{mru}]', + u'package [{package}]', + u'tag [{record_tag}]'] + + FORMAT_STRING_SHORT_PIECES = [u'{mru}'] + + SOURCE_LONG = 'Popularity Contest Log' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/recycler.py b/plaso/formatters/recycler.py new file mode 100644 index 0000000..48ecb0a --- /dev/null +++ b/plaso/formatters/recycler.py @@ -0,0 +1,82 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Windows recycle files.""" + +from plaso.lib import errors +from plaso.formatters import interface + + +class WinRecyclerFormatter(interface.ConditionalEventFormatter): + """Formatter for Windows recycle bin events.""" + + DATA_TYPE = 'windows:metadata:deleted_item' + + DRIVE_LIST = { + 0x00: 'A', + 0x01: 'B', + 0x02: 'C', + 0x03: 'D', + 0x04: 'E', + 0x05: 'F', + 0x06: 'G', + 0x07: 'H', + 0x08: 'I', + 0x09: 'J', + 0x0A: 'K', + 0x0B: 'L', + 0x0C: 'M', + 0x0D: 'N', + 0x0E: 'O', + 0x0F: 'P', + 0x10: 'Q', + 0x11: 'R', + 0x12: 'S', + 0x13: 'T', + 0x14: 'U', + 0x15: 'V', + 0x16: 'W', + 0x17: 'X', + 0x18: 'Y', + 0x19: 'Z', + } + + # The format string. + FORMAT_STRING_PIECES = [ + u'DC{index} ->', + u'{orig_filename}', + u'[{orig_filename_legacy}]', + u'(from drive {drive_letter})'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Deleted file: {orig_filename}'] + + SOURCE_LONG = 'Recycle Bin' + SOURCE_SHORT = 'RECBIN' + + def GetMessages(self, event_object): + """Return the message strings.""" + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter('Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + if hasattr(event_object, 'drive_number'): + event_object.drive_letter = self.DRIVE_LIST.get( + event_object.drive_number, 'C?') + + return super(WinRecyclerFormatter, self).GetMessages(event_object) + diff --git a/plaso/formatters/rubanetra.py b/plaso/formatters/rubanetra.py new file mode 100755 index 0000000..fff002b --- /dev/null +++ b/plaso/formatters/rubanetra.py @@ -0,0 +1,422 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This file contains formatters for the parsed Rubanetra events. Additionally, a Java Instant formatter was defined +as well.""" +from plaso.formatters import interface + +__author__ = 'Stefan Swerk (stefan_rubanetra@swerk.priv.at)' + + +class RubanetraBaseActivityFormatter(interface.ConditionalEventFormatter): + """ Formatter for a Rubanetra BaseActivity """ + + DATA_TYPE = 'java:rubanetra:base_activity' + SOURCE_SHORT = 'LOG' + SOURCE_LONG = 'at.jku.fim.rubanetra.BaseActivity' + + FORMAT_STRING_PIECES = [ + u'activityType: \'{activity_type}\'', + u'firstTimestamp: \'{first_timestamp}\'', + u'lastTimestamp: \'{last_timestamp}\'', + u'description: \'{description}\'', + u'sourceAddress: \'{source_address}\'', + u'destinationAddress: \'{destination_address}\'', + u'compoundFrameNumbers: \'{compound_frame_number_list}\'', + u'isReplaced: \'{replaced}\'', + u'optionalFields: \'{optional_field_dict}\''] + + +class RubanetraPcapActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:pcap_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.PcapActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES \ + + [u'totalSize: \'{pcap_total_size}\'', + u'frameNumber: \'{pcap_frame_number}\'', + u'wireLength: \'{pcap_packet_wirelen}\'', + u'headerCount: \'{pcap_header_count}\''] + + +class RubanetraHttpRequestActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:http_request_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.HttpRequestActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'serverAddress: \'{server_address}\'', + u'clientAddress: \'{client_address}\'', + u'httpVersion: \'{http_version}\'', + u'httpMethod: \'{http_method}\'', + u'httpQueryString: \'{http_query_string}\'', + u'httpQueryParameters: \'{http_query_parameters}\'', + u'httpRequestHeader: \'{http_request_header_dict}\'', + u'url: \'{url}\'', + u'originalHttpHeader: \'{orig_http_header}\'', + u'contentType: \'{content_type}\'', + u'isResponse: \'{is_response}\'', + u'JNetPcapHttpString: \'{jnetpcap_http_string}\''] + + +class RubanetraHttpResponseActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:http_response_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.HttpResponseActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'httpVersion: \'{http_version}\'', + u'httpStatusCode: \'{response_status_code}\'', + u'httpStatusLine: \'{response_status_line}\'', + u'httpResponseHeader: \'{response_header_dict}\'', + u'originalHttpHeader: \'{orig_http_header}\'', + u'contentType: \'{content_type}\'', + u'JNetPcapHttpString: \'{jnetpcap_http_string}\''] + + +class RubanetraDnsActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:dns_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.DnsActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'questionRecords: \'{question_record_list}\'', + u'answerRecords: \'{answer_record_list}\'', + u'authorityRecords: \'{authority_record_list}\'', + u'additionalRecords: \'{additional_record_list}\'', + u'dnsMessageHeader: \'{dns_message_header}\'', + u'isResponse: \'{is_response_bool}\''] + + +class RubanetraHttpImageActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:http_image_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.HttpImageActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'imageType: \'{image_type}\'', + u'imagePath: \'{image_path}\''] + + +class RubanetraArpActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:arp_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.ArpActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'hardwareType: \'{hardware_type}\'', + u'protocolType: \'{protocol_type}\'', + u'hardwareAddressLength: \'{hardware_address_length}\'', + u'protocolAddressLength: \'{protocol_address_length}\'', + u'senderHardwareAddress: \'{sender_mac_address}\'', + u'targetHardwareAddress: \'{target_mac_address}\'', + u'senderProtocolAddress: \'{sender_protocol_address}\'', + u'targetProtocolAddress: \'{target_protocol_address}\'', + u'JNetPcapArpString: \'{jnetpcap_arp}\''] + + +class RubanetraDhcpActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:dhcp_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.DhcpActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'dhcpMessage: \'{dhcp_message}\''] + + +class RubanetraEthernetActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:ethernet_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.EthernetActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'sourceMacAddress: \'{source_mac_address}\'', + u'destinationMacAddress: \'{destination_mac_address}\'', + u'ethernetType: \'{ethernet_type}\'', + u'ethernetTypeEnum: \'{ethernet_type_enum}\'', + u'JNetPcapEthernetString: \'{jnetpcap_ethernet}\''] + + +class RubanetraFtpActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:ftp_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.FtpActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'ftpActivityType: \'{ftp_type}\'', + u'command: \'{command}\'', + u'reply: \'{reply}\'', + u'list: \'{list}\''] + + +class RubanetraIcmpv4ActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:icmpv4_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.Icmpv4Activity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'icmpSubType: \'{icmp_subtype}\'', + u'icmpPacket: \'{icmp_packet}\'', + u'icmpMessage: \'{icmp_message}\'', + u'icmpType: \'{icmp_type}\'', + u'icmpCode: \'{icmp_code}\'', + u'sourceAddress: \'{source_address}\'', + u'destinationAddress: \'{destination_address}\'', + u'identifier: \'{identifier}\'', + u'sequence: \'{sequence}\'', + u'JNetPcapIcmpString: \'{jnetpcap_icmp}\''] + + +class RubanetraIcmpv6ActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:icmpv6_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.Icmpv6Activity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'icmpSubType: \'{icmp_subtype}\'', + u'icmpPacket: \'{icmp_packet}\'', + u'icmpMessage: \'{icmp_message}\'', + u'icmpType: \'{icmp_type}\'', + u'JNetPcapIcmpString: \'{jnetpcap_icmp}\''] + + +class RubanetraIpActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:ip_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.IpActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'version: \'{version}\'', + u'protocol: \'{protocol}\'', + u'sourceAddress: \'{source_address}\'', + u'destinationAddress: \'{destination_address}\''] + + +class RubanetraIpv4ActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:ipv4_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.Ipv4Activity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'internetHeaderLength: \'{internet_header_length}\'', + u'differentiatedServicesCodePoint: \'{differentiated_services_code_point}\'', + u'totalLength: \'{total_length}\'', + u'identification: \'{identification}\'', + u'flags: \'{flags}\'', + u'fragmentOffset: \'{fragment_offset}\'', + u'timeToLive: \'{time_to_live}\'', + u'headerChecksum: \'{header_checksum}\'', + u'options: \'{options}\'', + u'JNetPcapIpv4String: \'{jnetpcap_ip4}\''] + + +class RubanetraIpv6ActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:ipv6_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.Ipv6Activity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'trafficClass: \'{traffic_class}\'', + u'flowLabel: \'{flow_label}\'', + u'payloadLength: \'{payload_length}\'', + u'nextHeader: \'{next_header}\'', + u'hopLimit: \'{hop_limit}\'', + u'JNetPcapIpv6String: \'{jnetpcap_ip6}\'', + u'KrakenIpv6String: \'{kraken_ip6}\''] + + +class RubanetraMsnActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:msn_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.MsnActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'account: \'{account}\'', + u'chat: \'{chat}\''] + + +class RubanetraNetbiosActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:Netbios_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.NetbiosActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'datagramPacket: \'{datagram_packet}\'', + u'namePacket: \'{name_packet}\''] + + +class RubanetraPop3ActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:pop3_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.Pop3Activity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'subType: \'{sub_type}\'', + u'header: \'{header}\'', + u'data: \'{data}\'', + u'command: \'{command}\'', + u'response: \'{response}\''] + + +class RubanetraSmtpCommandActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:smtp_command_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.SmtpCommandActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'command: \'{command}\'', + u'parameter: \'{parameter}\''] + + +class RubanetraSmtpReplyActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:smtp_reply_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.SmtpReplyActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'code: \'{code}\'', + u'message: \'{message}\''] + + +class RubanetraSmtpSendActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:smtp_send_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.SmtpSendActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'header: \'{header}\'', + u'data: \'{data}\''] + + +class RubanetraSnmpv1ActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:snmpv1_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.Snmpv1Activity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'pdu: \'{pdu}\'', + u'sourceSocketAddress: \'{source_socket_address}\'', + u'destinationSocketAddress: \'{destination_socket_address}\''] + + +class RubanetraSnmpv2ActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:snmpv2_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.Snmpv2Activity' + + FORMAT_STRING_PIECES = RubanetraSnmpv1ActivityFormatter.FORMAT_STRING_PIECES + + +class RubanetraTcpActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:tcp_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.TcpActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'sourcePort: \'{source_port}\'', + u'destinationPort: \'{destination_port}\'', + u'sequenceNumber: \'{sequence_number}\'', + u'acknowledgeNumber: \'{acknowledge_number}\'', + u'relativeSequenceNumber: \'{relative_sequence_number}\'', + u'relativeAcknowledgeNumber: \'{relative_acknowledge_number}\'', + u'dataOffset: \'{data_offset}\'', + u'controlBits: \'{control_bits}\'', + u'windowSize: \'{window_size}\'', + u'checksum: \'{checksum}\'', + u'urgentPointer: \'{urgent_pointer}\'', + u'tcpLength: \'{tcp_length}\'', + u'options: \'{options}\'', + u'padding: \'{padding}\'', + u'syn: \'{syn}\'', + u'ack: \'{ack}\'', + u'psh: \'{psh}\'', + u'fin: \'{fin}\'', + u'rst: \'{rst}\'', + u'urg: \'{urg}\'', + u'direction: \'{direction}\'', + u'clientState: \'{client_state}\'', + u'serverState: \'{server_state}\'', + u'JNetPcapTcpString: \'{jnetpcap_tcp}\'', + u'sourceAddress: \'{source_address}\'', + u'destinationAddress: \'{destination_address}\'', + u'sourceSocketAddress: \'{source_socket_address}\'', + u'destinationSocketAddress: \'{destination_socket_address}\''] + + +class RubanetraTelnetActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:telnet_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.TelnetActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'subType: \'{sub_type}\'', + u'command: \'{command}\'', + u'option: \'{option}\'', + u'ansiMode: \'{ansi_mode}\'', + u'arguments: \'{arguments}\'', + u'text: \'{text}\'', + u'title: \'{title}\''] + + +class RubanetraTlsActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:tls_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.TlsActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'clientToServerTraffic: \'{client_to_server_traffic}\'', + u'serverToClientTraffic: \'{server_to_client_traffic}\''] + + +class RubanetraUdpActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:udp_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.UdpActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'sourcePort: \'{source_port}\'', + u'destinationPort: \'{destination_port}\'', + u'length: \'{length}\'', + u'checksum: \'{checksum}\'', + u'JNetPcapUdpString: \'{jnetpcap_udp}\'', + u'sourceSocketAddress: \'{source_socket_address}\'', + u'destinationSocketAddress: \'{destination_socket_address}\''] + + +class RubanetraOpenSSHActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:open_ssh_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.OpenSSHActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'clientToServerTraffic: \'{client_to_server_traffic}\'', + u'serverToClientTraffic: \'{server_to_client_traffic}\''] + + +class RubanetraDropboxTlsActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:dropbox_tls_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.DropboxActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'clientAddress: \'{client_address}\'', + u'serverAddress: \'{server_address}\''] + + +class RubanetraSpiderOakActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:spideroak_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.SpiderOakActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'clientAddress: \'{client_address}\'', + u'serverAddress: \'{server_address}\''] + + +class RubanetraSkypePayloadActivityFormatter(RubanetraBaseActivityFormatter): + DATA_TYPE = 'java:rubanetra:skype_payload_activity' + SOURCE_LONG = 'at.jku.fim.rubanetra.SkypePayloadActivity' + + FORMAT_STRING_PIECES = RubanetraBaseActivityFormatter.FORMAT_STRING_PIECES + \ + [u'sourceObjectId: \'{source_object_id}\'', + u'destinationObjectId: \'{destination_object_id}\'', + u'sourceHost: \'{source_host}\'', + u'destinationHost: \'{destination_host}\''] + + +class JavaInstantFormatter(interface.EventFormatter): + """ Formatter for a Java Instant """ + + DATA_TYPE = 'java:time:Instant' + SOURCE_SHORT = 'JAVA' + SOURCE_LONG = 'java.time.Instant' + + FORMAT_STRING = ( + u'epoch_seconds: \'{instant_epoch_seconds}, nano: \'{instant_nano}\'') + FORMAT_STRING_SHORT = (u'{instant_epoch_seconds}.{instant_nano}\'') diff --git a/plaso/formatters/safari.py b/plaso/formatters/safari.py new file mode 100644 index 0000000..bad4734 --- /dev/null +++ b/plaso/formatters/safari.py @@ -0,0 +1,33 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Safari History events.""" + +from plaso.formatters import interface + + +class SafariHistoryFormatter(interface.ConditionalEventFormatter): + """Formatter for Safari history events.""" + + DATA_TYPE = 'safari:history:visit' + + FORMAT_STRING_PIECES = [ + u'Visited: {url}', u'({title}', u'- {display_title}', ')', + 'Visit Count: {visit_count}'] + + SOURCE_LONG = 'Safari History' + SOURCE_SHORT = 'WEBHIST' diff --git a/plaso/formatters/selinux.py b/plaso/formatters/selinux.py new file mode 100644 index 0000000..e25afe8 --- /dev/null +++ b/plaso/formatters/selinux.py @@ -0,0 +1,34 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a selinux formatter in plaso.""" + +from plaso.formatters import interface + + +class SELinuxFormatter(interface.ConditionalEventFormatter): + """Formatter for selinux files.""" + + DATA_TYPE = 'selinux:line' + + FORMAT_STRING_SEPARATOR = u'' + + FORMAT_STRING_PIECES = [u'[', u'audit_type: {audit_type}', + u', pid: {pid}', u']', u' {body}'] + + SOURCE_LONG = 'Audit log File' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/shell_items.py b/plaso/formatters/shell_items.py new file mode 100644 index 0000000..96ba3eb --- /dev/null +++ b/plaso/formatters/shell_items.py @@ -0,0 +1,41 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the shell item events.""" + +from plaso.formatters import interface + + +class ShellItemFileEntryEventFormatter(interface.ConditionalEventFormatter): + """Class that formats Windows volume creation events.""" + + DATA_TYPE = 'windows:shell_item:file_entry' + + FORMAT_STRING_PIECES = [ + u'Name: {name}', + u'Long name: {long_name}', + u'Localized name: {localized_name}', + u'NTFS file reference: {file_reference}', + u'Origin: {origin}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Name: {name}', + u'NTFS file reference: {file_reference}', + u'Origin: {origin}'] + + SOURCE_LONG = 'File entry shell item' + SOURCE_SHORT = 'FILE' diff --git a/plaso/formatters/skydrivelog.py b/plaso/formatters/skydrivelog.py new file mode 100644 index 0000000..627f04f --- /dev/null +++ b/plaso/formatters/skydrivelog.py @@ -0,0 +1,36 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a skydrivelog formatter in plaso.""" + +from plaso.formatters import interface + + +class SkyDriveLogFormatter(interface.ConditionalEventFormatter): + """Formatter for SkyDrive log files events.""" + + DATA_TYPE = 'skydrive:log:line' + + FORMAT_STRING_PIECES = [ + u'[{source_code}]', + u'({log_level})', + u'{text}'] + + FORMAT_STRING_SHORT_PIECES = [u'{text}'] + + SOURCE_LONG = 'SkyDrive Log File' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/skydrivelogerr.py b/plaso/formatters/skydrivelogerr.py new file mode 100644 index 0000000..7c408f5 --- /dev/null +++ b/plaso/formatters/skydrivelogerr.py @@ -0,0 +1,37 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a skydrivelogerr formatter in plaso.""" + +from plaso.formatters import interface + + +class SkyDriveLogErrorFormatter(interface.ConditionalEventFormatter): + """Formatter for SkyDrive log error files events.""" + + DATA_TYPE = 'skydrive:error:line' + + FORMAT_STRING_PIECES = [ + u'[{module}', + u'{source_code}]', + u'{text}', + u'({detail})'] + + FORMAT_STRING_SHORT_PIECES = [u'{text}'] + + SOURCE_LONG = 'SkyDrive Error Log File' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/skype.py b/plaso/formatters/skype.py new file mode 100644 index 0000000..f0b1553 --- /dev/null +++ b/plaso/formatters/skype.py @@ -0,0 +1,88 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Skype Main database events.""" + +from plaso.formatters import interface + + +class SkypeAccountFormatter(interface.ConditionalEventFormatter): + """Formatter for Skype Account information.""" + + DATA_TYPE = 'skype:event:account' + + FORMAT_STRING_PIECES = [u'{username}', u'[{email}]', u'Country: {country}'] + + SOURCE_LONG = 'Skype Account' + SOURCE_SHORT = 'LOG' + + +class SkypeChatFormatter(interface.ConditionalEventFormatter): + """Formatter for Skype chat events.""" + + DATA_TYPE = 'skype:event:chat' + + FORMAT_STRING_PIECES = [ + u'From: {from_account}', + u'To: {to_account}', + u'[{title}]', + u'Message: [{text}]'] + + FORMAT_STRING_SHORT_PIECES = [u'From: {from_account}', u' To: {to_account}'] + + SOURCE_LONG = 'Skype Chat MSG' + SOURCE_SHORT = 'LOG' + + +class SkypeSMSFormatter(interface.ConditionalEventFormatter): + """Formatter for Skype SMS.""" + + DATA_TYPE = 'skype:event:sms' + + FORMAT_STRING_PIECES = [u'To: {number}', u'[{text}]'] + + SOURCE_LONG = 'Skype SMS' + SOURCE_SHORT = 'LOG' + + +class SkypeCallFormatter(interface.ConditionalEventFormatter): + """Formatter for Skype calls.""" + + DATA_TYPE = 'skype:event:call' + + FORMAT_STRING_PIECES = [ + u'From: {src_call}', + u'To: {dst_call}', + u'[{call_type}]'] + + SOURCE_LONG = 'Skype Call' + SOURCE_SHORT = 'LOG' + + +class SkypeTransferFileFormatter(interface.ConditionalEventFormatter): + """Formatter for Skype transfer files""" + + DATA_TYPE = 'skype:event:transferfile' + + FORMAT_STRING_PIECES = [ + u'Source: {source}', + u'Destination: {destination}', + u'File: {transferred_filename}', + u'[{action_type}]'] + + SOURCE_LONG = 'Skype Transfer Files' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/symantec.py b/plaso/formatters/symantec.py new file mode 100644 index 0000000..1d24e38 --- /dev/null +++ b/plaso/formatters/symantec.py @@ -0,0 +1,197 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for Symantec logs.""" + +from plaso.lib import errors +from plaso.formatters import interface + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class SymantecFormatter(interface.ConditionalEventFormatter): + """Define the formatting for Symantec events.""" + + DATA_TYPE = 'av:symantec:scanlog' + + EVENT_NAMES = { + '1': 'GL_EVENT_IS_ALERT', + '2': 'GL_EVENT_SCAN_STOP', + '3': 'GL_EVENT_SCAN_START', + '4': 'GL_EVENT_PATTERN_UPDATE', + '5': 'GL_EVENT_INFECTION', + '6': 'GL_EVENT_FILE_NOT_OPEN', + '7': 'GL_EVENT_LOAD_PATTERN', + '8': 'GL_STD_MESSAGE_INFO', + '9': 'GL_STD_MESSAGE_ERROR', + '10': 'GL_EVENT_CHECKSUM', + '11': 'GL_EVENT_TRAP', + '12': 'GL_EVENT_CONFIG_CHANGE', + '13': 'GL_EVENT_SHUTDOWN', + '14': 'GL_EVENT_STARTUP', + '16': 'GL_EVENT_PATTERN_DOWNLOAD', + '17': 'GL_EVENT_TOO_MANY_VIRUSES', + '18': 'GL_EVENT_FWD_TO_QSERVER', + '19': 'GL_EVENT_SCANDLVR', + '20': 'GL_EVENT_BACKUP', + '21': 'GL_EVENT_SCAN_ABORT', + '22': 'GL_EVENT_RTS_LOAD_ERROR', + '23': 'GL_EVENT_RTS_LOAD', + '24': 'GL_EVENT_RTS_UNLOAD', + '25': 'GL_EVENT_REMOVE_CLIENT', + '26': 'GL_EVENT_SCAN_DELAYED', + '27': 'GL_EVENT_SCAN_RESTART', + '28': 'GL_EVENT_ADD_SAVROAMCLIENT_TOSERVER', + '29': 'GL_EVENT_REMOVE_SAVROAMCLIENT_FROMSERVER', + '30': 'GL_EVENT_LICENSE_WARNING', + '31': 'GL_EVENT_LICENSE_ERROR', + '32': 'GL_EVENT_LICENSE_GRACE', + '33': 'GL_EVENT_UNAUTHORIZED_COMM', + '34': 'GL_EVENT_LOG_FWD_THRD_ERR', + '35': 'GL_EVENT_LICENSE_INSTALLED', + '36': 'GL_EVENT_LICENSE_ALLOCATED', + '37': 'GL_EVENT_LICENSE_OK', + '38': 'GL_EVENT_LICENSE_DEALLOCATED', + '39': 'GL_EVENT_BAD_DEFS_ROLLBACK', + '40': 'GL_EVENT_BAD_DEFS_UNPROTECTED', + '41': 'GL_EVENT_SAV_PROVIDER_PARSING_ERROR', + '42': 'GL_EVENT_RTS_ERROR', + '43': 'GL_EVENT_COMPLIANCE_FAIL', + '44': 'GL_EVENT_COMPLIANCE_SUCCESS', + '45': 'GL_EVENT_SECURITY_SYMPROTECT_POLICYVIOLATION', + '46': 'GL_EVENT_ANOMALY_START', + '47': 'GL_EVENT_DETECTION_ACTION_TAKEN', + '48': 'GL_EVENT_REMEDIATION_ACTION_PENDING', + '49': 'GL_EVENT_REMEDIATION_ACTION_FAILED', + '50': 'GL_EVENT_REMEDIATION_ACTION_SUCCESSFUL', + '51': 'GL_EVENT_ANOMALY_FINISH', + '52': 'GL_EVENT_COMMS_LOGIN_FAILED', + '53': 'GL_EVENT_COMMS_LOGIN_SUCCESS', + '54': 'GL_EVENT_COMMS_UNAUTHORIZED_COMM', + '55': 'GL_EVENT_CLIENT_INSTALL_AV', + '56': 'GL_EVENT_CLIENT_INSTALL_FW', + '57': 'GL_EVENT_CLIENT_UNINSTALL', + '58': 'GL_EVENT_CLIENT_UNINSTALL_ROLLBACK', + '59': 'GL_EVENT_COMMS_SERVER_GROUP_ROOT_CERT_ISSUE', + '60': 'GL_EVENT_COMMS_SERVER_CERT_ISSUE', + '61': 'GL_EVENT_COMMS_TRUSTED_ROOT_CHANGE', + '62': 'GL_EVENT_COMMS_SERVER_CERT_STARTUP_FAILED', + '63': 'GL_EVENT_CLIENT_CHECKIN', + '64': 'GL_EVENT_CLIENT_NO_CHECKIN', + '65': 'GL_EVENT_SCAN_SUSPENDED', + '66': 'GL_EVENT_SCAN_RESUMED', + '67': 'GL_EVENT_SCAN_DURATION_INSUFFICIENT', + '68': 'GL_EVENT_CLIENT_MOVE', + '69': 'GL_EVENT_SCAN_FAILED_ENHANCED', + '70': 'GL_EVENT_MAX_event_name', + '71': 'GL_EVENT_HEUR_THREAT_NOW_WHITELISTED', + '72': 'GL_EVENT_INTERESTING_PROCESS_DETECTED_START', + '73': 'GL_EVENT_LOAD_ERROR_COH', + '74': 'GL_EVENT_LOAD_ERROR_SYKNAPPS', + '75': 'GL_EVENT_INTERESTING_PROCESS_DETECTED_FINISH', + '76': 'GL_EVENT_HPP_SCAN_NOT_SUPPORTED_FOR_OS', + '77': 'GL_EVENT_HEUR_THREAT_NOW_KNOWN' + } + CATEGORY_NAMES = { + '1': 'GL_CAT_INFECTION', + '2': 'GL_CAT_SUMMARY', + '3': 'GL_CAT_PATTERN', + '4': 'GL_CAT_SECURITY' + } + ACTION_1_2_NAMES = { + '1': 'Quarantine infected file', + '2': 'Rename infected file', + '3': 'Delete infected file', + '4': 'Leave alone (log only)', + '5': 'Clean virus from file', + '6': 'Clean or delete macros' + } + ACTION_0_NAMES = { + '1': 'Quarantined', + '2': 'Renamed', + '3': 'Deleted', + '4': 'Left alone', + '5': 'Cleaned', + '6': ('Cleaned or macros deleted (no longer used as of ' + 'Symantec AntiVirus 9.x)'), + '7': 'Saved file as...', + '8': 'Sent to Intel (AMS)', + '9': 'Moved to backup location', + '10': 'Renamed backup file', + '11': 'Undo action in Quarantine View', + '12': 'Write protected or lack of permissions - Unable to act on file', + '13': 'Backed up file' + } + + # The identifier for the formatter (a regular expression) + FORMAT_STRING_SEPARATOR = u'; ' + FORMAT_STRING_PIECES = [ + u'Event Name: {event_map}', + u'Category Name: {category_map}', + u'Malware Name: {virus}', + u'Malware Path: {file}', + u'Action0: {action0_map}', + u'Action1: {action1_map}', + u'Action2: {action2_map}', + u'Description: {description}', + u'Scan ID: {scanid}', + u'Event Data: {event_data}', + u'Remote Machine: {remote_machine}', + u'Remote IP: {remote_machine_ip}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'{file}', + u'{virus}', + u'{action0_map}', + u'{action1_map}', + u'{action2_map}'] + + SOURCE_LONG = 'Symantec AV Log' + SOURCE_SHORT = 'LOG' + + def GetMessages(self, event_object): + """Returns a list of messages extracted from an event object. + + Args: + event_object: The event object (EventObject) containing the event + specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + """ + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + if hasattr(event_object, 'event'): + event_object.event_map = self.EVENT_NAMES.get( + event_object.event, 'Unknown') + if hasattr(event_object, 'cat'): + event_object.category_map = self.CATEGORY_NAMES.get( + event_object.cat, 'Unknown') + if hasattr(event_object, 'action1'): + event_object.action1_map = self.ACTION_1_2_NAMES.get( + event_object.action1, 'Unknown') + if hasattr(event_object, 'action2'): + event_object.action2_map = self.ACTION_1_2_NAMES.get( + event_object.action2, 'Unknown') + if hasattr(event_object, 'action0'): + event_object.action0_map = self.ACTION_0_NAMES.get( + event_object.action0, 'Unknown') + return super(SymantecFormatter, self).GetMessages(event_object) diff --git a/plaso/formatters/syslog.py b/plaso/formatters/syslog.py new file mode 100644 index 0000000..e54b8be --- /dev/null +++ b/plaso/formatters/syslog.py @@ -0,0 +1,33 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a syslog formatter in plaso.""" + +from plaso.formatters import interface + + +class SyslogLineFormatter(interface.ConditionalEventFormatter): + """Formatter for syslog files.""" + + DATA_TYPE = 'syslog:line' + + FORMAT_STRING_SEPARATOR = u'' + + FORMAT_STRING_PIECES = [u'[', u'{reporter}', u', pid: {pid}', u'] {body}'] + + SOURCE_LONG = 'Log File' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/task_scheduler.py b/plaso/formatters/task_scheduler.py new file mode 100644 index 0000000..b614243 --- /dev/null +++ b/plaso/formatters/task_scheduler.py @@ -0,0 +1,36 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Task Scheduler events.""" + +from plaso.formatters import interface + + +class TaskCacheEventFormatter(interface.ConditionalEventFormatter): + """Formatter for a generic Task Cache event.""" + + DATA_TYPE = 'task_scheduler:task_cache:entry' + + FORMAT_STRING_PIECES = [ + u'Task: {task_name}', + u'[Identifier: {task_identifier}]'] + + FORMAT_STRING_SHORT_PIECES = [ + u'Task: {task_name}'] + + SOURCE_LONG = 'Task Cache' + SOURCE_SHORT = 'REG' diff --git a/plaso/formatters/text.py b/plaso/formatters/text.py new file mode 100644 index 0000000..109d8ca --- /dev/null +++ b/plaso/formatters/text.py @@ -0,0 +1,30 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for text file-based events.""" + +from plaso.formatters import interface + + +class TextEventFormatter(interface.EventFormatter): + """Text event formatter.""" + + DATA_TYPE = u'text:entry' + FORMAT_STRING = u'{text}' + + SOURCE_SHORT = u'LOG' + SOURCE_LONG = u'Text File' diff --git a/plaso/formatters/utmp.py b/plaso/formatters/utmp.py new file mode 100644 index 0000000..c3b568f --- /dev/null +++ b/plaso/formatters/utmp.py @@ -0,0 +1,41 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the UTMP binary files.""" + +from plaso.formatters import interface + + +class UtmpSessionFormatter(interface.ConditionalEventFormatter): + """Formatter for UTMP session.""" + + DATA_TYPE = 'linux:utmp:event' + + FORMAT_STRING_PIECES = [ + u'User: {user}', + u'Computer Name: {computer_name}', + u'Terminal: {terminal}', + u'PID: {pid}', + u'Terminal_ID: {terminal_id}', + u'Status: {status}', + u'IP Address: {ip_address}', + u'Exit: {exit}'] + + FORMAT_STRING_SHORT_PIECES = [u'User: {user}'] + + SOURCE_LONG = 'UTMP session' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/utmpx.py b/plaso/formatters/utmpx.py new file mode 100644 index 0000000..1e8ade3 --- /dev/null +++ b/plaso/formatters/utmpx.py @@ -0,0 +1,36 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the UTMPX binary files.""" + +from plaso.formatters import interface + +class UtmpxSessionFormatter(interface.ConditionalEventFormatter): + """Formatter for UTMPX session.""" + + DATA_TYPE = 'mac:utmpx:event' + + FORMAT_STRING_PIECES = [ + u'User: {user}', + u'Status: {status}', + u'Computer Name: {computer_name}', + u'Terminal: {terminal}'] + + FORMAT_STRING_SHORT_PIECES = [u'User: {user}'] + + SOURCE_LONG = 'UTMPX session' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/windows.py b/plaso/formatters/windows.py new file mode 100644 index 0000000..fdd5adb --- /dev/null +++ b/plaso/formatters/windows.py @@ -0,0 +1,38 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Windows events.""" + +from plaso.formatters import interface + + +class WindowsVolumeCreationEventFormatter(interface.ConditionalEventFormatter): + """Class that formats Windows volume creation events.""" + + DATA_TYPE = 'windows:volume:creation' + + FORMAT_STRING_PIECES = [ + u'{device_path}', + u'Serial number: 0x{serial_number:08X}', + u'Origin: {origin}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'{device_path}', + u'Origin: {origin}'] + + SOURCE_LONG = 'System' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/winevt.py b/plaso/formatters/winevt.py new file mode 100644 index 0000000..5eef4ee --- /dev/null +++ b/plaso/formatters/winevt.py @@ -0,0 +1,113 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Windows EventLog (EVT) files.""" + +from plaso.lib import errors +from plaso.formatters import interface + + +class WinEvtFormatter(interface.ConditionalEventFormatter): + """Define the formatting for Windows EventLog (EVT) record.""" + + DATA_TYPE = 'windows:evt:record' + + # TODO: add string representation of facility. + FORMAT_STRING_PIECES = [ + u'[{event_identifier} /', + u'0x{event_identifier:04x}]', + u'Severity: {severity_string}', + u'Record Number: {record_number}', + u'Event Type: {event_type_string}', + u'Event Category: {event_category}', + u'Source Name: {source_name}', + u'Computer Name: {computer_name}', + u'Strings: {strings}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'[{event_identifier} /', + u'0x{event_identifier:04x}]', + u'Strings: {strings}'] + + SOURCE_LONG = 'WinEVT' + SOURCE_SHORT = 'EVT' + + # Mapping of the numeric event types to a descriptive string. + _EVENT_TYPES = [ + u'Error event', + u'Warning event', + u'Information event', + u'Success Audit event', + u'Failure Audit event'] + + _SEVERITY = [ + u'Success', + u'Informational', + u'Warning', + u'Error'] + + def GetEventTypeString(self, event_type): + """Retrieves a string representation of the event type. + + Args: + event_type: The numeric event type. + + Returns: + An Unicode string containing a description of the event type. + """ + if event_type >= 0 and event_type < len(self._EVENT_TYPES): + return self._EVENT_TYPES[event_type] + return u'Unknown {0:d}'.format(event_type) + + def GetSeverityString(self, severity): + """Retrieves a string representation of the severity. + + Args: + severity: The numeric severity. + + Returns: + An Unicode string containing a description of the event type. + """ + if severity >= 0 and severity < len(self._SEVERITY): + return self._SEVERITY[severity] + return u'Unknown {0:d}'.format(severity) + + def GetMessages(self, event_object): + """Returns a list of messages extracted from an event object. + + Args: + event_object: The event object (EventObject) containing the event + specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + """ + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + # Update event object with the event type string. + event_object.event_type_string = self.GetEventTypeString( + event_object.event_type) + + # TODO: add string representation of facility. + + # Update event object with the severity string. + event_object.severity_string = self.GetSeverityString(event_object.severity) + + return super(WinEvtFormatter, self).GetMessages(event_object) diff --git a/plaso/formatters/winevtx.py b/plaso/formatters/winevtx.py new file mode 100644 index 0000000..e667347 --- /dev/null +++ b/plaso/formatters/winevtx.py @@ -0,0 +1,41 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatters for Windows XML EventLog (EVTX) related events.""" +from plaso.formatters import interface + + +class WinEvtxFormatter(interface.ConditionalEventFormatter): + """Formatter for a Windows XML EventLog (EVTX) record.""" + DATA_TYPE = 'windows:evtx:record' + + FORMAT_STRING_PIECES = [ + u'[{event_identifier} /', + u'0x{event_identifier:04x}]', + u'Record Number: {record_number}', + u'Event Level: {event_level}', + u'Source Name: {source_name}', + u'Computer Name: {computer_name}', + u'Strings: {strings}', + u'XML string: {xml_strings}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'[{event_identifier} /', + u'0x{event_identifier:04x}]', + u'Strings: {strings}'] + + SOURCE_LONG = 'WinEVTX' + SOURCE_SHORT = 'EVT' diff --git a/plaso/formatters/winfirewall.py b/plaso/formatters/winfirewall.py new file mode 100644 index 0000000..875721a --- /dev/null +++ b/plaso/formatters/winfirewall.py @@ -0,0 +1,63 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Windows firewall log files.""" + +from plaso.formatters import interface + + +class WinFirewallFormatter(interface.ConditionalEventFormatter): + """A formatter for Windows firewall log entries.""" + + DATA_TYPE = 'windows:firewall:log_entry' + + # TODO: Add more "elegant" formatting, as in transform ICMP code/type into + # a more human readable format as well as translating the additional info + # column (meaning may depend on action field). + FORMAT_STRING_PIECES = [ + u'{action}', + u'[', + u'{protocol}', + u'{path}', + u']', + u'From: {source_ip}', + u':{source_port}', + u'>', + u'{dest_ip}', + u':{dest_port}', + u'Size (bytes): {size}', + u'Flags [{flags}]', + u'TCP Seq Number: {tcp_seq}', + u'TCP ACK Number: {tcp_ack}', + u'TCP Window Size (bytes): {tcp_win}', + u'ICMP type: {icmp_type}', + u'ICMP code: {icmp_code}', + u'Additional info: {info}', + ] + + FORMAT_STRING_SHORT_PIECES = [ + u'{action}', + u'[{protocol}]', + u'{source_ip}', + u': {source_port}', + u'>', + u'{dest_ip}', + u': {dest_port}', + ] + + SOURCE_LONG = 'Windows Firewall Log' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/winjob.py b/plaso/formatters/winjob.py new file mode 100644 index 0000000..8a8a7a9 --- /dev/null +++ b/plaso/formatters/winjob.py @@ -0,0 +1,36 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Windows Scheduled Task job events.""" + +from plaso.formatters import interface + + +class WinJobFormatter(interface.ConditionalEventFormatter): + """Formatter for a Java Cache IDX download item.""" + + DATA_TYPE = 'windows:tasks:job' + + FORMAT_STRING_PIECES = [ + u'Application: {application}', + u'{parameter}', + u'Scheduled by: {username}', + u'Working Directory: {working_dir}', + u'Run Iteration: {trigger}'] + + SOURCE_LONG = 'Windows Scheduled Task Job' + SOURCE_SHORT = 'JOB' diff --git a/plaso/formatters/winlnk.py b/plaso/formatters/winlnk.py new file mode 100644 index 0000000..c4d0529 --- /dev/null +++ b/plaso/formatters/winlnk.py @@ -0,0 +1,101 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Windows Shortcut (LNK) files.""" + +from plaso.lib import errors +from plaso.formatters import interface + + +class WinLnkLinkFormatter(interface.ConditionalEventFormatter): + """Formatter for a Windows Shortcut (LNK) link event.""" + + DATA_TYPE = 'windows:lnk:link' + + FORMAT_STRING_PIECES = [ + u'[{description}]', + u'File size: {file_size}', + u'File attribute flags: 0x{file_attribute_flags:08x}', + u'Drive type: {drive_type}', + u'Drive serial number: 0x{drive_serial_number:08x}', + u'Volume label: {volume_label}', + u'Local path: {local_path}', + u'Network path: {network_path}', + u'cmd arguments: {command_line_arguments}', + u'env location: {env_var_location}', + u'Relative path: {relative_path}', + u'Working dir: {working_directory}', + u'Icon location: {icon_location}', + u'Link target: [{link_target}]'] + + FORMAT_STRING_SHORT_PIECES = [ + u'[{description}]', + u'{linked_path}', + u'{command_line_arguments}'] + + SOURCE_LONG = 'Windows Shortcut' + SOURCE_SHORT = 'LNK' + + def _GetLinkedPath(self, event_object): + """Determines the linked path. + + Args: + event_object: The event object (EventObject) containing the event + specific data. + + Returns: + A string containing the linked path. + """ + if hasattr(event_object, 'local_path'): + return event_object.local_path + + if hasattr(event_object, 'network_path'): + return event_object.network_path + + if hasattr(event_object, 'relative_path'): + paths = [] + if hasattr(event_object, 'working_directory'): + paths.append(event_object.working_directory) + paths.append(event_object.relative_path) + + return u'\\'.join(paths) + + return 'Unknown' + + def GetMessages(self, event_object): + """Returns a list of messages extracted from an event object. + + Args: + event_object: The event object (EventObject) containing the event + specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + """ + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + # Update event object with a description if necessary. + if not hasattr(event_object, 'description'): + event_object.description = u'Empty description' + + # Update event object with the linked path. + event_object.linked_path = self._GetLinkedPath(event_object) + + return super(WinLnkLinkFormatter, self).GetMessages(event_object) diff --git a/plaso/formatters/winprefetch.py b/plaso/formatters/winprefetch.py new file mode 100644 index 0000000..b0d12a9 --- /dev/null +++ b/plaso/formatters/winprefetch.py @@ -0,0 +1,76 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for the Windows Prefetch events.""" + +from plaso.lib import errors +from plaso.formatters import interface + + +class WinPrefetchExecutionFormatter(interface.ConditionalEventFormatter): + """Class that formats Windows Prefetch execution events.""" + + DATA_TYPE = 'windows:prefetch:execution' + + FORMAT_STRING_PIECES = [ + u'Prefetch', + u'[{executable}] was executed -', + u'run count {run_count}', + u'path: {path}', + u'hash: 0x{prefetch_hash:08X}', + u'{volumes_string}'] + + FORMAT_STRING_SHORT_PIECES = [ + u'{executable} was run', + u'{run_count} time(s)'] + + SOURCE_LONG = 'WinPrefetch' + SOURCE_SHORT = 'LOG' + + def GetMessages(self, event_object): + """Returns a list of messages extracted from an event object. + + Args: + event_object: The event object (instance of EventObject) containing + the event specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + + Raises: + WrongFormatter: when the data type of the formatter does not match + that of the event object. + """ + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter( + u'Invalid event object - unsupported data type: {0:s}'.format( + event_object.data_type)) + + volumes_strings = [] + for volume_index in range(0, event_object.number_of_volumes): + volumes_strings.append(( + u'volume: {0:d} [serial number: 0x{1:08X}, device path: ' + u'{2:s}]').format( + volume_index + 1, + event_object.volume_serial_numbers[volume_index], + event_object.volume_device_paths[volume_index])) + + if volumes_strings: + event_object.volumes_string = u', '.join(volumes_strings) + + return super(WinPrefetchExecutionFormatter, self).GetMessages(event_object) diff --git a/plaso/formatters/winreg.py b/plaso/formatters/winreg.py new file mode 100644 index 0000000..148d6b5 --- /dev/null +++ b/plaso/formatters/winreg.py @@ -0,0 +1,81 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for Windows NT Registry (REGF) files.""" + +from plaso.lib import errors +from plaso.formatters import interface + + +class WinRegistryGenericFormatter(interface.EventFormatter): + """Formatter for a generic Windows Registry key or value.""" + + DATA_TYPE = 'windows:registry:key_value' + + FORMAT_STRING = u'[{keyname}] {text}' + FORMAT_STRING_ALTERNATIVE = u'{text}' + + SOURCE_LONG = 'Registry Key' + SOURCE_SHORT = 'REG' + + def GetMessages(self, event_object): + """Returns a list of messages extracted from an event object. + + Args: + event_object: The event object (EventObject) containing the event + specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + """ + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + regvalue = getattr(event_object, 'regvalue', {}) + + string_parts = [] + for key, value in sorted(regvalue.items()): + string_parts.append(u'{0:s}: {1!s}'.format(key, value)) + + text = u' '.join(string_parts) + + event_object.text = text + if hasattr(event_object, 'keyname'): + self.format_string = self.FORMAT_STRING + else: + self.format_string = self.FORMAT_STRING_ALTERNATIVE + + return super(WinRegistryGenericFormatter, self).GetMessages(event_object) + + def GetSources(self, event_object): + """Returns a list of source short and long messages for the event.""" + if self.DATA_TYPE != event_object.data_type: + raise errors.WrongFormatter(u'Unsupported data type: {0:s}.'.format( + event_object.data_type)) + + self.source_string = getattr(event_object, 'source_long', None) + + if not self.source_string: + registry_type = getattr(event_object, 'registry_type', 'UNKNOWN') + self.source_string = u'{0:s} key'.format(registry_type) + + if hasattr(event_object, 'source_append'): + self.source_string += u' {0:s}'.format(event_object.source_append) + + return super(WinRegistryGenericFormatter, self).GetSources(event_object) diff --git a/plaso/formatters/winregservice.py b/plaso/formatters/winregservice.py new file mode 100644 index 0000000..3b2e2c8 --- /dev/null +++ b/plaso/formatters/winregservice.py @@ -0,0 +1,58 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Formatter for service entries derived from Windows Registry files.""" + +from plaso.formatters import winreg +from plaso.winnt import human_readable_service_enums + + +class WinRegistryServiceFormatter(winreg.WinRegistryGenericFormatter): + """Formatter for a Windows service event extracted from the Registry.""" + + DATA_TYPE = 'windows:registry:service' + + def GetMessages(self, event_object): + """Returns a list of messages extracted from the event object. + + This formatter will make the values of certain service parameters more + readable by humans. + + Args: + event_object: The event object (an instance of EventObject) containing + the event specific data. + + Returns: + A list that contains both the longer and shorter version of the message + string. + """ + regvalue = getattr(event_object, 'regvalue', {}) + # Loop over all the registry value names in the service key. + for service_value_name in regvalue.keys(): + # A temporary variable so we can refer to this long name more easily. + service_enums = human_readable_service_enums.SERVICE_ENUMS + # Check if we need to can make the value more human readable. + if service_value_name in service_enums.keys(): + service_enum = service_enums[service_value_name] + # Find the human readable version of the name and fall back to the + # raw value if it's not found. + human_readable_value = service_enum.get( + regvalue[service_value_name], + regvalue[service_value_name]) + regvalue[service_value_name] = human_readable_value + + return super(WinRegistryServiceFormatter, self).GetMessages(event_object) diff --git a/plaso/formatters/xchatlog.py b/plaso/formatters/xchatlog.py new file mode 100644 index 0000000..95ef24d --- /dev/null +++ b/plaso/formatters/xchatlog.py @@ -0,0 +1,31 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a xchatlog formatter in plaso.""" + +from plaso.formatters import interface + + +class XChatLogFormatter(interface.ConditionalEventFormatter): + """Formatter for XChat log files.""" + + DATA_TYPE = 'xchat:log:line' + + FORMAT_STRING_PIECES = [u'[nickname: {nickname}]', u'{text}'] + + SOURCE_LONG = 'XChat Log File' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/xchatscrollback.py b/plaso/formatters/xchatscrollback.py new file mode 100644 index 0000000..e6188dc --- /dev/null +++ b/plaso/formatters/xchatscrollback.py @@ -0,0 +1,33 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a xchatscrollback formatter in plaso.""" + +from plaso.formatters import interface + + +class XChatScrollbackFormatter(interface.ConditionalEventFormatter): + """Formatter for XChat scrollback files.""" + + DATA_TYPE = 'xchat:scrollback:line' + + FORMAT_STRING_SEPARATOR = u'' + + FORMAT_STRING_PIECES = [u'[', u'nickname: {nickname}', u']', u' {text}'] + + SOURCE_LONG = 'XChat Scrollback File' + SOURCE_SHORT = 'LOG' diff --git a/plaso/formatters/zeitgeist.py b/plaso/formatters/zeitgeist.py new file mode 100644 index 0000000..9dca044 --- /dev/null +++ b/plaso/formatters/zeitgeist.py @@ -0,0 +1,31 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a formatter for Zeitgeist.""" + +from plaso.formatters import interface + + +class ZeitgeistEventFormatter(interface.EventFormatter): + """The event formatter for Zeitgeist event.""" + + DATA_TYPE = 'zeitgeist:activity' + + FORMAT_STRING = u'{subject_uri}' + + SOURCE_LONG = 'Zeitgeist activity log' + SOURCE_SHORT = 'LOG' diff --git a/plaso/frontend/__init__.py b/plaso/frontend/__init__.py new file mode 100755 index 0000000..1f5c4b3 --- /dev/null +++ b/plaso/frontend/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/frontend/frontend.py b/plaso/frontend/frontend.py new file mode 100755 index 0000000..80098a2 --- /dev/null +++ b/plaso/frontend/frontend.py @@ -0,0 +1,1693 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The common front-end functionality.""" + +import abc +import locale +import logging +import os +import pdb +import sys +import traceback + +from dfvfs.helpers import source_scanner +from dfvfs.lib import definitions as dfvfs_definitions +from dfvfs.lib import errors as dfvfs_errors +from dfvfs.resolver import context +from dfvfs.volume import tsk_volume_system +from dfvfs.volume import vshadow_volume_system + +import plaso +from plaso import parsers # pylint: disable=unused-import +from plaso.engine import single_process +from plaso.engine import utils as engine_utils +from plaso.engine import worker +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import pfilter +from plaso.lib import storage +from plaso.lib import timelib +from plaso.multi_processing import multi_process +from plaso.parsers import manager as parsers_manager + +import pytz + + +class FrontendInputReader(object): + """Class that implements the input reader interface for the engine.""" + + @abc.abstractmethod + def Read(self): + """Reads a string from the input. + + Returns: + A string containing the input. + """ + + +class FrontendOutputWriter(object): + """Class that implements the output writer interface for the engine.""" + + @abc.abstractmethod + def Write(self, string): + """Writes a string to the output. + + Args: + string: A string containing the output. + """ + + +class StdinFrontendInputReader(object): + """Class that implements a stdin input reader.""" + + def Read(self): + """Reads a string from the input. + + Returns: + A string containing the input. + """ + return sys.stdin.readline() + + +class StdoutFrontendOutputWriter(object): + """Class that implements a stdout output writer.""" + + ENCODING = u'utf-8' + + def Write(self, string): + """Writes a string to the output. + + Args: + string: A string containing the output. + """ + try: + sys.stdout.write(string.encode(self.ENCODING)) + except UnicodeEncodeError: + logging.error( + u'Unable to properly write output, line will be partially ' + u'written out.') + sys.stdout.write(u'LINE ERROR') + sys.stdout.write(string.encode(self.ENCODING, 'ignore')) + + +class Frontend(object): + """Class that implements a front-end.""" + + # The maximum length of the line in number of characters. + _LINE_LENGTH = 80 + + def __init__(self, input_reader, output_writer): + """Initializes the front-end object. + + Args: + input_reader: the input reader (instance of FrontendInputReader). + The default is None which indicates to use the stdin + input reader. + output_writer: the output writer (instance of FrontendOutputWriter). + The default is None which indicates to use the stdout + output writer. + """ + super(Frontend, self).__init__() + self._input_reader = input_reader + self._output_writer = output_writer + + # TODO: add preferred_encoding support of the output writer. + self.preferred_encoding = locale.getpreferredencoding().lower() + + def PrintColumnValue(self, name, description, column_length=25): + """Prints a value with a name and description aligned to the column length. + + Args: + name: The name. + description: The description. + column_length: Optional column length. The default is 25. + """ + line_length = self._LINE_LENGTH - column_length - 3 + + # The format string of the first line of the column value. + primary_format_string = u'{{0:>{0:d}s}} : {{1:s}}\n'.format(column_length) + + # The format string of successive lines of the column value. + secondary_format_string = u'{{0:<{0:d}s}}{{1:s}}\n'.format( + column_length + 3) + + if len(description) < line_length: + self._output_writer.Write(primary_format_string.format(name, description)) + return + + # Split the description in words. + words = description.split() + + current = 0 + + lines = [] + word_buffer = [] + for word in words: + current += len(word) + 1 + if current >= line_length: + current = len(word) + lines.append(u' '.join(word_buffer)) + word_buffer = [word] + else: + word_buffer.append(word) + lines.append(u' '.join(word_buffer)) + + # Print the column value on multiple lines. + self._output_writer.Write(primary_format_string.format(name, lines[0])) + for line in lines[1:]: + self._output_writer.Write(secondary_format_string.format(u'', line)) + + def PrintHeader(self, text, character='*'): + """Prints the header as a line with centered text. + + Args: + text: The header text. + character: Optional header line character. The default is '*'. + """ + self._output_writer.Write(u'\n') + + format_string = u'{{0:{0:s}^{1:d}}}\n'.format(character, self._LINE_LENGTH) + header_string = format_string.format(u' {0:s} '.format(text)) + self._output_writer.Write(header_string) + + def PrintSeparatorLine(self): + """Prints a separator line.""" + self._output_writer.Write(u'{0:s}\n'.format(u'-' * self._LINE_LENGTH)) + + +class StorageMediaFrontend(Frontend): + """Class that implements a front-end with storage media support.""" + + # For context see: http://en.wikipedia.org/wiki/Byte + _UNITS_1000 = ['B', 'kB', 'MB', 'GB', 'TB', 'EB', 'ZB', 'YB'] + _UNITS_1024 = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'EiB', 'ZiB', 'YiB'] + + def __init__(self, input_reader, output_writer): + """Initializes the front-end object. + + Args: + input_reader: the input reader (instance of FrontendInputReader). + The default is None which indicates to use the stdin + input reader. + output_writer: the output writer (instance of FrontendOutputWriter). + The default is None which indicates to use the stdout + output writer. + """ + super(StorageMediaFrontend, self).__init__(input_reader, output_writer) + self._partition_offset = None + self._process_vss = True + self._resolver_context = context.Context() + self._scan_context = source_scanner.SourceScannerContext() + self._source_path = None + self._source_scanner = source_scanner.SourceScanner() + self._vss_stores = None + + def _GetHumanReadableSize(self, size): + """Retrieves a human readable string of the size. + + Args: + size: The size in bytes. + + Returns: + A human readable string of the size. + """ + magnitude_1000 = 0 + size_1000 = float(size) + while size_1000 >= 1000: + size_1000 /= 1000 + magnitude_1000 += 1 + + magnitude_1024 = 0 + size_1024 = float(size) + while size_1024 >= 1024: + size_1024 /= 1024 + magnitude_1024 += 1 + + size_string_1000 = None + if magnitude_1000 > 0 and magnitude_1000 <= 7: + size_string_1000 = u'{0:.1f}{1:s}'.format( + size_1000, self._UNITS_1000[magnitude_1000]) + + size_string_1024 = None + if magnitude_1024 > 0 and magnitude_1024 <= 7: + size_string_1024 = u'{0:.1f}{1:s}'.format( + size_1024, self._UNITS_1024[magnitude_1024]) + + if not size_string_1000 or not size_string_1024: + return u'{0:d} B'.format(size) + + return u'{0:s} / {1:s} ({2:d} B)'.format( + size_string_1024, size_string_1000, size) + + def _GetPartionIdentifierFromUser(self, volume_system, volume_identifiers): + """Asks the user to provide the partitioned volume identifier. + + Args: + volume_system: The volume system (instance of dfvfs.TSKVolumeSystem). + volume_identifiers: List of allowed volume identifiers. + + Raises: + FileSystemScannerError: if the source cannot be processed. + """ + self._output_writer.Write( + u'The following partitions were found:\n' + u'Identifier\tOffset (in bytes)\tSize (in bytes)\n') + + for volume_identifier in volume_identifiers: + volume = volume_system.GetVolumeByIdentifier(volume_identifier) + if not volume: + raise errors.FileSystemScannerError( + u'Volume missing for identifier: {0:s}.'.format(volume_identifier)) + + volume_extent = volume.extents[0] + self._output_writer.Write( + u'{0:s}\t\t{1:d} (0x{1:08x})\t{2:s}\n'.format( + volume.identifier, volume_extent.offset, + self._GetHumanReadableSize(volume_extent.size))) + + self._output_writer.Write(u'\n') + + while True: + self._output_writer.Write( + u'Please specify the identifier of the partition that should ' + u'be processed:\nNote that you can abort with Ctrl^C.\n') + + selected_volume_identifier = self._input_reader.Read() + selected_volume_identifier = selected_volume_identifier.strip() + + if selected_volume_identifier in volume_identifiers: + break + + self._output_writer.Write( + u'\n' + u'Unsupported partition identifier, please try again or abort ' + u'with Ctrl^C.\n' + u'\n') + + return selected_volume_identifier + + def _GetVolumeTSKPartition( + self, scan_context, partition_number=None, partition_offset=None): + """Determines the volume path specification. + + Args: + scan_context: the scan context (instance of dfvfs.ScanContext). + partition_number: Optional preferred partition number. The default is + None. + partition_offset: Optional preferred partition byte offset. The default + is None. + + Returns: + The volume scan node (instance of dfvfs.SourceScanNode) or None + if no supported partition was found. + + Raises: + SourceScannerError: if the format of or within the source + is not supported or the the scan context + is invalid. + RuntimeError: if the volume for a specific identifier cannot be + retrieved. + """ + if (not scan_context or not scan_context.last_scan_node or + not scan_context.last_scan_node.path_spec): + raise errors.SourceScannerError(u'Invalid scan context.') + + volume_system = tsk_volume_system.TSKVolumeSystem() + volume_system.Open(scan_context.last_scan_node.path_spec) + + volume_identifiers = self._source_scanner.GetVolumeIdentifiers( + volume_system) + if not volume_identifiers: + logging.info(u'No supported partitions found.') + return + + if partition_number is not None and partition_number > 0: + # Plaso uses partition numbers starting with 1 while dfvfs expects + # the volume index to start with 0. + volume = volume_system.GetVolumeByIndex(partition_number - 1) + if volume: + volume_location = u'/{0:s}'.format(volume.identifier) + volume_scan_node = scan_context.last_scan_node.GetSubNodeByLocation( + volume_location) + if not volume_scan_node: + raise RuntimeError( + u'Unable to retrieve volume scan node by location: {0:s}'.format( + volume_location)) + return volume_scan_node + + logging.warning(u'No such partition: {0:d}.'.format(partition_number)) + + if partition_offset is not None: + for volume in volume_system.volumes: + volume_extent = volume.extents[0] + if volume_extent.offset == partition_offset: + volume_location = u'/{0:s}'.format(volume.identifier) + volume_scan_node = scan_context.last_scan_node.GetSubNodeByLocation( + volume_location) + if not volume_scan_node: + raise RuntimeError(( + u'Unable to retrieve volume scan node by location: ' + u'{0:s}').format(volume_location)) + return volume_scan_node + + logging.warning( + u'No such partition with offset: {0:d} (0x{0:08x}).'.format( + partition_offset)) + + if len(volume_identifiers) == 1: + volume_location = u'/{0:s}'.format(volume_identifiers[0]) + + else: + try: + selected_volume_identifier = self._GetPartionIdentifierFromUser( + volume_system, volume_identifiers) + except KeyboardInterrupt: + raise errors.UserAbort(u'File system scan aborted.') + + volume = volume_system.GetVolumeByIdentifier(selected_volume_identifier) + if not volume: + raise RuntimeError( + u'Unable to retrieve volume by identifier: {0:s}'.format( + selected_volume_identifier)) + + volume_location = u'/{0:s}'.format(selected_volume_identifier) + + volume_scan_node = scan_context.last_scan_node.GetSubNodeByLocation( + volume_location) + if not volume_scan_node: + raise RuntimeError( + u'Unable to retrieve volume scan node by location: {0:s}'.format( + volume_location)) + return volume_scan_node + + def _GetVolumeVssStoreIdentifiers(self, scan_context, vss_stores=None): + """Determines the VSS store identifiers. + + Args: + scan_context: the scan context (instance of dfvfs.ScanContext). + vss_stores: Optional list of preferred VSS stored identifiers. The + default is None. + + Raises: + SourceScannerError: if the format of or within the source + is not supported or the the scan context + is invalid. + """ + if (not scan_context or not scan_context.last_scan_node or + not scan_context.last_scan_node.path_spec): + raise errors.SourceScannerError(u'Invalid scan context.') + + volume_system = vshadow_volume_system.VShadowVolumeSystem() + volume_system.Open(scan_context.last_scan_node.path_spec) + + volume_identifiers = self._source_scanner.GetVolumeIdentifiers( + volume_system) + if not volume_identifiers: + return + + try: + self._vss_stores = self._GetVssStoreIdentifiersFromUser( + volume_system, volume_identifiers, vss_stores=vss_stores) + except KeyboardInterrupt: + raise errors.UserAbort(u'File system scan aborted.') + + return + + def _GetVssStoreIdentifiersFromUser( + self, volume_system, volume_identifiers, vss_stores=None): + """Asks the user to provide the VSS store identifiers. + + Args: + volume_system: The volume system (instance of dfvfs.VShadowVolumeSystem). + volume_identifiers: List of allowed volume identifiers. + vss_stores: Optional list of preferred VSS stored identifiers. The + default is None. + + Returns: + The list of selected VSS store identifiers or None. + + Raises: + SourceScannerError: if the source cannot be processed. + """ + normalized_volume_identifiers = [] + for volume_identifier in volume_identifiers: + volume = volume_system.GetVolumeByIdentifier(volume_identifier) + if not volume: + raise errors.SourceScannerError( + u'Volume missing for identifier: {0:s}.'.format(volume_identifier)) + + try: + volume_identifier = int(volume.identifier[3:], 10) + normalized_volume_identifiers.append(volume_identifier) + except ValueError: + pass + + if vss_stores: + if len(vss_stores) == 1 and vss_stores[0] == 'all': + # We need to set the stores to cover all vss stores. + vss_stores = range(1, volume_system.number_of_volumes + 1) + + if not set(vss_stores).difference( + normalized_volume_identifiers): + return vss_stores + + print_header = True + while True: + if print_header: + self._output_writer.Write( + u'The following Volume Shadow Snapshots (VSS) were found:\n' + u'Identifier\tVSS store identifier\tCreation Time\n') + + for volume_identifier in volume_identifiers: + volume = volume_system.GetVolumeByIdentifier(volume_identifier) + if not volume: + raise errors.SourceScannerError( + u'Volume missing for identifier: {0:s}.'.format( + volume_identifier)) + + vss_identifier = volume.GetAttribute('identifier') + vss_creation_time = volume.GetAttribute('creation_time') + vss_creation_time = timelib.Timestamp.FromFiletime( + vss_creation_time.value) + vss_creation_time = timelib.Timestamp.CopyToIsoFormat( + vss_creation_time) + self._output_writer.Write(u'{0:s}\t\t{1:s}\t{2:s}\n'.format( + volume.identifier, vss_identifier.value, vss_creation_time)) + + self._output_writer.Write(u'\n') + + print_header = False + + self._output_writer.Write( + u'Please specify the identifier(s) of the VSS that should be ' + u'processed:\nNote that a range of stores can be defined as: 3..5. ' + u'Multiple stores can\nbe defined as: 1,3,5 (a list of comma ' + u'separated values). Ranges and lists can\nalso be combined ' + u'as: 1,3..5. The first store is 1. If no stores are specified\n' + u'none will be processed. You can abort with Ctrl^C.\n') + + selected_vss_stores = self._input_reader.Read() + + selected_vss_stores = selected_vss_stores.strip() + if not selected_vss_stores: + break + + try: + selected_vss_stores = self._ParseVssStores(selected_vss_stores) + except errors.BadConfigOption: + selected_vss_stores = [] + + if not set(selected_vss_stores).difference(normalized_volume_identifiers): + break + + self._output_writer.Write( + u'\n' + u'Unsupported VSS identifier(s), please try again or abort with ' + u'Ctrl^C.\n' + u'\n') + + return selected_vss_stores + + def _ParseVssStores(self, vss_stores): + """Parses the user specified VSS stores stirng. + + Args: + vss_stores: a string containing the VSS stores. + Where 1 represents the first store. + + Returns: + The list of VSS stores. + + Raises: + BadConfigOption: if the VSS stores option is invalid. + """ + if not vss_stores: + return [] + + if vss_stores == 'all': + # We want to process all the VSS stores. + return ['all'] + + stores = [] + for vss_store_range in vss_stores.split(','): + # Determine if the range is formatted as 1..3 otherwise it indicates + # a single store number. + if '..' in vss_store_range: + first_store, last_store = vss_store_range.split('..') + try: + first_store = int(first_store, 10) + last_store = int(last_store, 10) + except ValueError: + raise errors.BadConfigOption( + u'Invalid VSS store range: {0:s}.'.format(vss_store_range)) + + for store_number in range(first_store, last_store + 1): + if store_number not in stores: + stores.append(store_number) + else: + try: + store_number = int(vss_store_range, 10) + except ValueError: + raise errors.BadConfigOption( + u'Invalid VSS store range: {0:s}.'.format(vss_store_range)) + + if store_number not in stores: + stores.append(store_number) + + return sorted(stores) + + def AddImageOptions(self, argument_group): + """Adds the storage media image options to the argument group. + + Args: + argument_group: The argparse argument group (instance of + argparse._ArgumentGroup). + """ + argument_group.add_argument( + '-o', '--offset', dest='image_offset', action='store', default=None, + type=int, help=( + u'The offset of the volume within the storage media image in ' + u'number of sectors. A sector is 512 bytes in size by default ' + u'this can be overwritten with the --sector_size option.')) + + argument_group.add_argument( + '--sector_size', '--sector-size', dest='bytes_per_sector', + action='store', type=int, default=512, help=( + u'The number of bytes per sector, which is 512 by default.')) + + argument_group.add_argument( + '--ob', '--offset_bytes', '--offset_bytes', dest='image_offset_bytes', + action='store', default=None, type=int, help=( + u'The offset of the volume within the storage media image in ' + u'number of bytes.')) + + def AddVssProcessingOptions(self, argument_group): + """Adds the VSS processing options to the argument group. + + Args: + argument_group: The argparse argument group (instance of + argparse._ArgumentGroup). + """ + argument_group.add_argument( + '--no_vss', '--no-vss', dest='no_vss', action='store_true', + default=False, help=( + u'Do not scan for Volume Shadow Snapshots (VSS). This means that ' + u'VSS information will not be included in the extraction phase.')) + + argument_group.add_argument( + '--vss_stores', '--vss-stores', dest='vss_stores', action='store', + type=str, default=None, help=( + u'Define Volume Shadow Snapshots (VSS) (or stores that need to be ' + u'processed. A range of stores can be defined as: \'3..5\'. ' + u'Multiple stores can be defined as: \'1,3,5\' (a list of comma ' + u'separated values). Ranges and lists can also be combined as: ' + u'\'1,3..5\'. The first store is 1.')) + + # TODO: remove this when support to handle multiple partitions is added. + def GetSourcePathSpec(self): + """Retrieves the source path specification. + + Returns: + The source path specification (instance of dfvfs.PathSpec) or None. + """ + if self._scan_context and self._scan_context.last_scan_node: + return self._scan_context.last_scan_node.path_spec + + def ParseOptions(self, options, source_option='source'): + """Parses the options and initializes the front-end. + + Args: + options: the command line arguments (instance of argparse.Namespace). + source_option: optional name of the source option. The default is source. + + Raises: + BadConfigOption: if the options are invalid. + """ + if not options: + raise errors.BadConfigOption(u'Missing options.') + + self._source_path = getattr(options, source_option, None) + if not self._source_path: + raise errors.BadConfigOption(u'Missing source path.') + + if isinstance(self._source_path, str): + encoding = sys.stdin.encoding + + # Note that sys.stdin.encoding can be None. + if not encoding: + encoding = self.preferred_encoding + + # Note that the source path option can be an encoded byte string + # and we need to turn it into an Unicode string. + try: + self._source_path = unicode( + self._source_path.decode(encoding)) + except UnicodeDecodeError as exception: + raise errors.BadConfigOption(( + u'Unable to convert source path to Unicode with error: ' + u'{0:s}.').format(exception)) + + elif not isinstance(self._source_path, unicode): + raise errors.BadConfigOption( + u'Unsupported source path, string type required.') + + self._source_path = os.path.abspath(self._source_path) + + def ScanSource(self, options): + """Scans the source path for volume and file systems. + + This functions sets the internal source path specification and source + type values. + + Args: + options: the command line arguments (instance of argparse.Namespace). + + Raises: + SourceScannerError: if the format of or within the source + is not supported or the the scan context + is invalid. + """ + partition_number = getattr(options, 'partition_number', None) + if (partition_number is not None and + isinstance(partition_number, basestring)): + try: + partition_number = int(partition_number, 10) + except ValueError: + logging.warning(u'Invalid partition number: {0:s}.'.format( + partition_number)) + partition_number = None + + partition_offset = getattr(options, 'image_offset_bytes', None) + if (partition_offset is not None and + isinstance(partition_offset, basestring)): + try: + partition_offset = int(partition_offset, 10) + except ValueError: + logging.warning(u'Invalid image offset bytes: {0:s}.'.format( + partition_offset)) + partition_offset = None + + if partition_offset is None and hasattr(options, 'image_offset'): + image_offset = getattr(options, 'image_offset') + bytes_per_sector = getattr(options, 'bytes_per_sector', 512) + + if isinstance(image_offset, basestring): + try: + image_offset = int(image_offset, 10) + except ValueError: + logging.warning(u'Invalid image offset: {0:s}.'.format(image_offset)) + image_offset = None + + if isinstance(bytes_per_sector, basestring): + try: + bytes_per_sector = int(bytes_per_sector, 10) + except ValueError: + logging.warning(u'Invalid bytes per sector: {0:s}.'.format( + bytes_per_sector)) + bytes_per_sector = 512 + + if image_offset: + partition_offset = image_offset * bytes_per_sector + + self._process_vss = not getattr(options, 'no_vss', False) + if self._process_vss: + vss_stores = getattr(options, 'vss_stores', None) + if vss_stores: + vss_stores = self._ParseVssStores(vss_stores) + + # Note that os.path.exists() does not support Windows device paths. + if (not self._source_path.startswith('\\\\.\\') and + not os.path.exists(self._source_path)): + raise errors.SourceScannerError( + u'No such device, file or directory: {0:s}.'.format( + self._source_path)) + + # Use the dfVFS source scanner to do the actual scanning. + scan_path_spec = None + + self._scan_context.OpenSourcePath(self._source_path) + + while True: + last_scan_node = self._scan_context.last_scan_node + try: + self._scan_context = self._source_scanner.Scan( + self._scan_context, scan_path_spec=scan_path_spec) + except dfvfs_errors.BackEndError as exception: + raise errors.SourceScannerError( + u'Unable to scan source, with error: {0:s}'.format(exception)) + + # The source is a directory or file. + if self._scan_context.source_type in [ + self._scan_context.SOURCE_TYPE_DIRECTORY, + self._scan_context.SOURCE_TYPE_FILE]: + break + + if (not self._scan_context.last_scan_node or + self._scan_context.last_scan_node == last_scan_node): + raise errors.SourceScannerError( + u'No supported file system found in source: {0:s}.'.format( + self._source_path)) + + # The source scanner found a file system. + if self._scan_context.last_scan_node.type_indicator in [ + dfvfs_definitions.TYPE_INDICATOR_TSK]: + break + + # The source scanner found a BitLocker encrypted volume and we need + # a credential to unlock the volume. + if self._scan_context.last_scan_node.type_indicator in [ + dfvfs_definitions.TYPE_INDICATOR_BDE]: + # TODO: ask for password. + raise errors.SourceScannerError( + u'BitLocker encrypted volume not yet supported.') + + # The source scanner found a partition table and we need to determine + # which partition needs to be processed. + elif self._scan_context.last_scan_node.type_indicator in [ + dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION]: + scan_node = self._GetVolumeTSKPartition( + self._scan_context, partition_number=partition_number, + partition_offset=partition_offset) + if not scan_node: + break + self._scan_context.last_scan_node = scan_node + + self._partition_offset = getattr(scan_node.path_spec, 'start_offset', 0) + + elif self._scan_context.last_scan_node.type_indicator in [ + dfvfs_definitions.TYPE_INDICATOR_VSHADOW]: + if self._process_vss: + self._GetVolumeVssStoreIdentifiers( + self._scan_context, vss_stores=vss_stores) + + # Get the scan node of the current volume. + scan_node = self._scan_context.last_scan_node.GetSubNodeByLocation(u'/') + self._scan_context.last_scan_node = scan_node + break + + else: + raise errors.SourceScannerError( + u'Unsupported volume system found in source: {0:s}.'.format( + self._source_path)) + + self._source_type = self._scan_context.source_type + + if self._scan_context.source_type in [ + self._scan_context.SOURCE_TYPE_STORAGE_MEDIA_DEVICE, + self._scan_context.SOURCE_TYPE_STORAGE_MEDIA_IMAGE]: + + if self._scan_context.last_scan_node.type_indicator not in [ + dfvfs_definitions.TYPE_INDICATOR_TSK]: + logging.warning( + u'Unsupported file system falling back to single file mode.') + self._source_type = self._scan_context.source_type + + elif self._partition_offset is None: + self._partition_offset = 0 + + +class ExtractionFrontend(StorageMediaFrontend): + """Class that implements an extraction front-end.""" + + _DEFAULT_PROFILING_SAMPLE_RATE = 1000 + + # Approximately 250 MB of queued items per worker. + _DEFAULT_QUEUE_SIZE = 125000 + + _EVENT_SERIALIZER_FORMAT_PROTO = u'proto' + _EVENT_SERIALIZER_FORMAT_JSON = u'json' + + def __init__(self, input_reader, output_writer): + """Initializes the front-end object. + + Args: + input_reader: the input reader (instance of FrontendInputReader). + The default is None which indicates to use the stdin + input reader. + output_writer: the output writer (instance of FrontendOutputWriter). + The default is None which indicates to use the stdout + output writer. + """ + super(ExtractionFrontend, self).__init__(input_reader, output_writer) + self._buffer_size = 0 + self._collection_process = None + self._collector = None + self._debug_mode = False + self._enable_profiling = False + self._engine = None + self._filter_expression = None + self._filter_object = None + self._mount_path = None + self._number_of_worker_processes = 0 + self._old_preprocess = False + self._open_files = False + self._operating_system = None + self._output_module = None + self._parser_names = None + self._preprocess = False + self._profiling_sample_rate = self._DEFAULT_PROFILING_SAMPLE_RATE + self._queue_size = self._DEFAULT_QUEUE_SIZE + self._run_foreman = True + self._single_process_mode = False + self._show_worker_memory_information = False + self._storage_file_path = None + self._storage_serializer_format = self._EVENT_SERIALIZER_FORMAT_PROTO + self._timezone = pytz.utc + + def _CheckStorageFile(self, storage_file_path): + """Checks if the storage file path is valid. + + Args: + storage_file_path: The path of the storage file. + + Raises: + BadConfigOption: if the storage file path is invalid. + """ + if os.path.exists(storage_file_path): + if not os.path.isfile(storage_file_path): + raise errors.BadConfigOption( + u'Storage file: {0:s} already exists and is not a file.'.format( + storage_file_path)) + logging.warning(u'Appending to an already existing storage file.') + + dirname = os.path.dirname(storage_file_path) + if not dirname: + dirname = '.' + + # TODO: add a more thorough check to see if the storage file really is + # a plaso storage file. + + if not os.access(dirname, os.W_OK): + raise errors.BadConfigOption( + u'Unable to write to storage file: {0:s}'.format(storage_file_path)) + + # Note that this function is not called by the normal termination. + def _CleanUpAfterAbort(self): + """Signals the tool to stop running nicely after an abort.""" + if self._single_process_mode and self._debug_mode: + logging.warning(u'Running in debug mode, set up debugger.') + pdb.post_mortem() + return + + if self._collector: + logging.warning(u'Stopping collector.') + self._collector.SignalEndOfInput() + + if self._engine: + self._engine.SignalAbort() + + def _DebugPrintCollector(self, options): + """Prints debug information about the collector. + + Args: + options: the command line arguments (instance of argparse.Namespace). + """ + filter_file = getattr(options, 'file_filter', None) + if self._scan_context.source_type in [ + self._scan_context.SOURCE_TYPE_STORAGE_MEDIA_DEVICE, + self._scan_context.SOURCE_TYPE_STORAGE_MEDIA_IMAGE]: + if filter_file: + logging.debug(u'Starting a collection on image with filter.') + else: + logging.debug(u'Starting a collection on image.') + + elif self._scan_context.source_type in [ + self._scan_context.SOURCE_TYPE_DIRECTORY]: + if filter_file: + logging.debug(u'Starting a collection on directory with filter.') + else: + logging.debug(u'Starting a collection on directory.') + + elif self._scan_context.source_type == self._scan_context.SOURCE_TYPE_FILE: + logging.debug(u'Starting a collection on a single file.') + + else: + logging.warning(u'Unsupported source type.') + + # TODO: have the frontend fill collecton information gradually + # and set it as the last step of preprocessing? + def _PreprocessSetCollectionInformation(self, options, pre_obj): + """Sets the collection information as part of the preprocessing. + + Args: + options: the command line arguments (instance of argparse.Namespace). + pre_obj: the preprocess object (instance of PreprocessObject). + """ + collection_information = {} + + collection_information['version'] = plaso.GetVersion() + collection_information['configured_zone'] = self._timezone + collection_information['file_processed'] = self._source_path + collection_information['output_file'] = self._storage_file_path + collection_information['protobuf_size'] = self._buffer_size + collection_information['parser_selection'] = getattr( + options, 'parsers', '(no list set)') + collection_information['preferred_encoding'] = self.preferred_encoding + collection_information['time_of_run'] = timelib.Timestamp.GetNow() + + collection_information['parsers'] = self._parser_names + collection_information['preprocess'] = self._preprocess + + if self._scan_context.source_type in [ + self._scan_context.SOURCE_TYPE_DIRECTORY]: + recursive = True + else: + recursive = False + collection_information['recursive'] = recursive + collection_information['debug'] = self._debug_mode + collection_information['vss parsing'] = bool(self._vss_stores) + + if self._filter_expression: + collection_information['filter'] = self._filter_expression + + filter_file = getattr(options, 'file_filter', None) + if filter_file: + if os.path.isfile(filter_file): + filters = [] + with open(filter_file, 'rb') as fh: + for line in fh: + filters.append(line.rstrip()) + collection_information['file_filter'] = ', '.join(filters) + + if self._operating_system: + collection_information['os_detected'] = self._operating_system + else: + collection_information['os_detected'] = 'N/A' + + if self._scan_context.source_type in [ + self._scan_context.SOURCE_TYPE_STORAGE_MEDIA_DEVICE, + self._scan_context.SOURCE_TYPE_STORAGE_MEDIA_IMAGE]: + collection_information['method'] = 'imaged processed' + collection_information['image_offset'] = self._partition_offset + else: + collection_information['method'] = 'OS collection' + + if self._single_process_mode: + collection_information['runtime'] = 'single process mode' + else: + collection_information['runtime'] = 'multi process mode' + collection_information['workers'] = self._number_of_worker_processes + + pre_obj.collection_information = collection_information + + def _PreprocessSetParserFilter(self, options, pre_obj): + """Sets the parser filter as part of the preprocessing. + + Args: + options: the command line arguments (instance of argparse.Namespace). + pre_obj: The previously created preprocessing object (instance of + PreprocessObject) or None. + """ + # TODO: Make this more sane. Currently we are only checking against + # one possible version of Windows, and then making the assumption if + # that is not correct we default to Windows 7. Same thing with other + # OS's, no assumption or checks are really made there. + # Also this is done by default, and no way for the user to turn off + # this behavior, need to add a parameter to the frontend that takes + # care of overwriting this behavior. + + # TODO: refactor putting the filter into the options object. + # See if it can be passed in another way. + if not getattr(options, 'filter', None): + options.filter = u'' + + if not options.filter: + options.filter = u'' + + parser_filter_string = u'' + + # If no parser filter is set, let's use our best guess of the OS + # to build that list. + if not getattr(options, 'parsers', ''): + if hasattr(pre_obj, 'osversion'): + os_version = pre_obj.osversion.lower() + # TODO: Improve this detection, this should be more 'intelligent', since + # there are quite a lot of versions out there that would benefit from + # loading up the set of 'winxp' parsers. + if 'windows xp' in os_version: + parser_filter_string = 'winxp' + elif 'windows server 2000' in os_version: + parser_filter_string = 'winxp' + elif 'windows server 2003' in os_version: + parser_filter_string = 'winxp' + else: + parser_filter_string = 'win7' + + if getattr(pre_obj, 'guessed_os', None): + if pre_obj.guessed_os == 'MacOSX': + parser_filter_string = u'macosx' + elif pre_obj.guessed_os == 'Linux': + parser_filter_string = 'linux' + + if parser_filter_string: + options.parsers = parser_filter_string + logging.info(u'Parser filter expression changed to: {0:s}'.format( + options.parsers)) + + def _PreprocessSetTimezone(self, options, pre_obj): + """Sets the timezone as part of the preprocessing. + + Args: + options: the command line arguments (instance of argparse.Namespace). + pre_obj: The previously created preprocessing object (instance of + PreprocessObject) or None. + """ + if hasattr(pre_obj, 'time_zone_str'): + logging.info(u'Setting timezone to: {0:s}'.format(pre_obj.time_zone_str)) + try: + pre_obj.zone = pytz.timezone(pre_obj.time_zone_str) + except pytz.UnknownTimeZoneError: + if hasattr(options, 'zone'): + logging.warning(( + u'Unable to automatically configure timezone, falling back ' + u'to the user supplied one: {0:s}').format(self._timezone)) + pre_obj.zone = self._timezone + else: + logging.warning(u'TimeZone was not properly set, defaulting to UTC') + pre_obj.zone = pytz.utc + else: + # TODO: shouldn't the user to be able to always override the timezone + # detection? Or do we need an input sanitation function. + pre_obj.zone = self._timezone + + if not getattr(pre_obj, 'zone', None): + pre_obj.zone = self._timezone + + def _ProcessSourceMultiProcessMode(self, options): + """Processes the source in a multiple process. + + Muliprocessing is used to start up separate processes. + + Args: + options: the command line arguments (instance of argparse.Namespace). + """ + # TODO: replace by an option. + start_collection_process = True + + self._number_of_worker_processes = getattr(options, 'workers', 0) + + logging.info(u'Starting extraction in multi process mode.') + + self._engine = multi_process.MultiProcessEngine( + maximum_number_of_queued_items=self._queue_size) + + self._engine.SetEnableDebugOutput(self._debug_mode) + self._engine.SetEnableProfiling( + self._enable_profiling, + profiling_sample_rate=self._profiling_sample_rate) + self._engine.SetOpenFiles(self._open_files) + + if self._filter_object: + self._engine.SetFilterObject(self._filter_object) + + if self._mount_path: + self._engine.SetMountPath(self._mount_path) + + if self._text_prepend: + self._engine.SetTextPrepend(self._text_prepend) + # TODO: add support to handle multiple partitions. + self._engine.SetSource( + self.GetSourcePathSpec(), resolver_context=self._resolver_context) + + logging.debug(u'Starting preprocessing.') + pre_obj = self.PreprocessSource(options) + logging.debug(u'Preprocessing done.') + + # TODO: make sure parsers option is not set by preprocessing. + parser_filter_string = getattr(options, 'parsers', '') + + self._parser_names = [] + for _, parser_class in parsers_manager.ParsersManager.GetParsers( + parser_filter_string=parser_filter_string): + self._parser_names.append(parser_class.NAME) + + self._PreprocessSetCollectionInformation(options, pre_obj) + + if 'filestat' in self._parser_names: + include_directory_stat = True + else: + include_directory_stat = False + + filter_file = getattr(options, 'file_filter', None) + if filter_file: + filter_find_specs = engine_utils.BuildFindSpecsFromFile( + filter_file, pre_obj=pre_obj) + else: + filter_find_specs = None + + if start_collection_process: + resolver_context = context.Context() + else: + resolver_context = self._resolver_context + + # TODO: create multi process collector. + self._collector = self._engine.CreateCollector( + include_directory_stat, vss_stores=self._vss_stores, + filter_find_specs=filter_find_specs, resolver_context=resolver_context) + + self._DebugPrintCollector(options) + + if self._output_module: + storage_writer = storage.BypassStorageWriter( + self._engine.storage_queue, self._storage_file_path, + output_module_string=self._output_module, pre_obj=pre_obj) + else: + storage_writer = storage.StorageFileWriter( + self._engine.storage_queue, self._storage_file_path, + buffer_size=self._buffer_size, pre_obj=pre_obj, + serializer_format=self._storage_serializer_format) + + try: + self._engine.ProcessSource( + self._collector, storage_writer, + parser_filter_string=parser_filter_string, + number_of_extraction_workers=self._number_of_worker_processes, + have_collection_process=start_collection_process, + have_foreman_process=self._run_foreman, + show_memory_usage=self._show_worker_memory_information) + + except KeyboardInterrupt: + self._CleanUpAfterAbort() + raise errors.UserAbort(u'Process source aborted.') + + def _ProcessSourceSingleProcessMode(self, options): + """Processes the source in a single process. + + Args: + options: the command line arguments (instance of argparse.Namespace). + """ + logging.info(u'Starting extraction in single process mode.') + + try: + self._StartSingleThread(options) + except Exception as exception: + # The tool should generally not be run in single process mode + # for other reasons than to debug. Hence the general error + # catching. + logging.error(u'An uncaught exception occured: {0:s}.\n{1:s}'.format( + exception, traceback.format_exc())) + if self._debug_mode: + pdb.post_mortem() + + def _StartSingleThread(self, options): + """Starts everything up in a single process. + + This should not normally be used, since running the tool in a single + process buffers up everything into memory until the storage is called. + + Just to make it clear, this starts up the collection, completes that + before calling the worker that extracts all EventObjects and stores + them in memory. when that is all done, the storage function is called + to drain the buffer. Hence the tool's excessive use of memory in this + mode and the reason why it is not suggested to be used except for + debugging reasons (and mostly to get into the debugger). + + This is therefore mostly useful during debugging sessions for some + limited parsing. + + Args: + options: the command line arguments (instance of argparse.Namespace). + """ + self._engine = single_process.SingleProcessEngine(self._queue_size) + self._engine.SetEnableDebugOutput(self._debug_mode) + self._engine.SetEnableProfiling( + self._enable_profiling, + profiling_sample_rate=self._profiling_sample_rate) + self._engine.SetOpenFiles(self._open_files) + + if self._filter_object: + self._engine.SetFilterObject(self._filter_object) + + if self._mount_path: + self._engine.SetMountPath(self._mount_path) + + if self._text_prepend: + self._engine.SetTextPrepend(self._text_prepend) + + # TODO: add support to handle multiple partitions. + self._engine.SetSource( + self.GetSourcePathSpec(), resolver_context=self._resolver_context) + + logging.debug(u'Starting preprocessing.') + pre_obj = self.PreprocessSource(options) + + logging.debug(u'Preprocessing done.') + + # TODO: make sure parsers option is not set by preprocessing. + parser_filter_string = getattr(options, 'parsers', '') + + self._parser_names = [] + for _, parser_class in parsers_manager.ParsersManager.GetParsers( + parser_filter_string=parser_filter_string): + self._parser_names.append(parser_class.NAME) + + self._PreprocessSetCollectionInformation(options, pre_obj) + + if 'filestat' in self._parser_names: + include_directory_stat = True + else: + include_directory_stat = False + + filter_file = getattr(options, 'file_filter', None) + if filter_file: + filter_find_specs = engine_utils.BuildFindSpecsFromFile( + filter_file, pre_obj=pre_obj) + else: + filter_find_specs = None + + self._collector = self._engine.CreateCollector( + include_directory_stat, vss_stores=self._vss_stores, + filter_find_specs=filter_find_specs, + resolver_context=self._resolver_context) + + self._DebugPrintCollector(options) + + if self._output_module: + storage_writer = storage.BypassStorageWriter( + self._engine.storage_queue, self._storage_file_path, + output_module_string=self._output_module, pre_obj=pre_obj) + else: + storage_writer = storage.StorageFileWriter( + self._engine.storage_queue, self._storage_file_path, + buffer_size=self._buffer_size, pre_obj=pre_obj, + serializer_format=self._storage_serializer_format) + + try: + self._engine.ProcessSource( + self._collector, storage_writer, + parser_filter_string=parser_filter_string) + + except KeyboardInterrupt: + self._CleanUpAfterAbort() + raise errors.UserAbort(u'Process source aborted.') + + finally: + self._resolver_context.Empty() + + def AddExtractionOptions(self, argument_group): + """Adds the extraction options to the argument group. + + Args: + argument_group: The argparse argument group (instance of + argparse._ArgumentGroup). + """ + argument_group.add_argument( + '--use_old_preprocess', '--use-old-preprocess', dest='old_preprocess', + action='store_true', default=False, help=( + u'Only used in conjunction when appending to a previous storage ' + u'file. When this option is used then a new preprocessing object ' + u'is not calculated and instead the last one that got added to ' + u'the storage file is used. This can be handy when parsing an ' + u'image that contains more than a single partition.')) + + def AddInformationalOptions(self, argument_group): + """Adds the informational options to the argument group. + + Args: + argument_group: The argparse argument group (instance of + argparse._ArgumentGroup). + """ + argument_group.add_argument( + '-d', '--debug', dest='debug', action='store_true', default=False, + help=( + u'Enable debug mode. Intended for troubleshooting parsing ' + u'issues.')) + + def AddPerformanceOptions(self, argument_group): + """Adds the performance options to the argument group. + + Args: + argument_group: The argparse argument group (instance of + argparse._ArgumentGroup). + """ + argument_group.add_argument( + '--buffer_size', '--buffer-size', '--bs', dest='buffer_size', + action='store', default=0, + help=u'The buffer size for the output (defaults to 196MiB).') + + argument_group.add_argument( + '--queue_size', '--queue-size', dest='queue_size', action='store', + default=0, help=( + u'The maximum number of queued items per worker ' + u'(defaults to {0:d})').format(self._DEFAULT_QUEUE_SIZE)) + + if worker.BaseEventExtractionWorker.SupportsProfiling(): + argument_group.add_argument( + '--profile', dest='enable_profiling', action='store_true', + default=False, help=( + u'Enable profiling of memory usage. Intended for ' + u'troubleshooting memory issues.')) + + argument_group.add_argument( + '--profile_sample_rate', '--profile-sample-rate', + dest='profile_sample_rate', action='store', default=0, help=( + u'The profile sample rate (defaults to a sample every {0:d} ' + u'files).').format(self._DEFAULT_PROFILING_SAMPLE_RATE)) + + def GetSourceFileSystemSearcher(self): + """Retrieves the file system searcher of the source. + + Returns: + The file system searcher object (instance of dfvfs.FileSystemSearcher). + """ + return self._engine.GetSourceFileSystemSearcher( + resolver_context=self._resolver_context) + + def ParseOptions(self, options, source_option='source'): + """Parses the options and initializes the front-end. + + Args: + options: the command line arguments (instance of argparse.Namespace). + source_option: optional name of the source option. The default is source. + + Raises: + BadConfigOption: if the options are invalid. + """ + super(ExtractionFrontend, self).ParseOptions( + options, source_option=source_option) + + self._buffer_size = getattr(options, 'buffer_size', 0) + if self._buffer_size: + # TODO: turn this into a generic function that supports more size + # suffixes both MB and MiB and also that does not allow m as a valid + # indicator for MiB since m represents milli not Mega. + try: + if self._buffer_size[-1].lower() == 'm': + self._buffer_size = int(self._buffer_size[:-1], 10) + self._buffer_size *= self._BYTES_IN_A_MIB + else: + self._buffer_size = int(self._buffer_size, 10) + except ValueError: + raise errors.BadConfigOption( + u'Invalid buffer size: {0:s}.'.format(self._buffer_size)) + + queue_size = getattr(options, 'queue_size', None) + if queue_size: + try: + self._queue_size = int(queue_size, 10) + except ValueError: + raise errors.BadConfigOption( + u'Invalid queue size: {0:s}.'.format(queue_size)) + + self._enable_profiling = getattr(options, 'enable_profiling', False) + + profile_sample_rate = getattr(options, 'profile_sample_rate', None) + if profile_sample_rate: + try: + self._profiling_sample_rate = int(profile_sample_rate, 10) + except ValueError: + raise errors.BadConfigOption( + u'Invalid profile sample rate: {0:s}.'.format(profile_sample_rate)) + + serializer_format = getattr( + options, 'serializer_format', self._EVENT_SERIALIZER_FORMAT_PROTO) + if serializer_format: + self.SetStorageSerializer(serializer_format) + + self._filter_expression = getattr(options, 'filter', None) + if self._filter_expression: + self._filter_object = pfilter.GetMatcher(self._filter_expression) + if not self._filter_object: + raise errors.BadConfigOption( + u'Invalid filter expression: {0:s}'.format(self._filter_expression)) + + filter_file = getattr(options, 'file_filter', None) + if filter_file and not os.path.isfile(filter_file): + raise errors.BadConfigOption( + u'No such collection filter file: {0:s}.'.format(filter_file)) + + self._debug_mode = getattr(options, 'debug', False) + + self._old_preprocess = getattr(options, 'old_preprocess', False) + + timezone_string = getattr(options, 'timezone', None) + if timezone_string: + self._timezone = pytz.timezone(timezone_string) + + self._single_process_mode = getattr( + options, 'single_process', False) + + self._output_module = getattr(options, 'output_module', None) + + self._operating_system = getattr(options, 'os', None) + self._open_files = getattr(options, 'open_files', False) + self._text_prepend = getattr(options, 'text_prepend', None) + + if self._operating_system: + self._mount_path = getattr(options, 'filename', None) + + def PreprocessSource(self, options): + """Preprocesses the source. + + Args: + options: the command line arguments (instance of argparse.Namespace). + + Returns: + The preprocessing object (instance of PreprocessObject). + """ + pre_obj = None + + if self._old_preprocess and os.path.isfile(self._storage_file_path): + # Check if the storage file contains a preprocessing object. + try: + with storage.StorageFile( + self._storage_file_path, read_only=True) as store: + storage_information = store.GetStorageInformation() + if storage_information: + logging.info(u'Using preprocessing information from a prior run.') + pre_obj = storage_information[-1] + self._preprocess = False + except IOError: + logging.warning(u'Storage file does not exist, running preprocess.') + + if self._preprocess and self._scan_context.source_type in [ + self._scan_context.SOURCE_TYPE_DIRECTORY, + self._scan_context.SOURCE_TYPE_STORAGE_MEDIA_DEVICE, + self._scan_context.SOURCE_TYPE_STORAGE_MEDIA_IMAGE]: + try: + self._engine.PreprocessSource( + self._operating_system, resolver_context=self._resolver_context) + except IOError as exception: + logging.error(u'Unable to preprocess with error: {0:s}'.format( + exception)) + return + + # TODO: Remove the need for direct access to the pre_obj in favor + # of the knowledge base. + pre_obj = getattr(self._engine.knowledge_base, '_pre_obj', None) + + if not pre_obj: + pre_obj = event.PreprocessObject() + + self._PreprocessSetTimezone(options, pre_obj) + self._PreprocessSetParserFilter(options, pre_obj) + + return pre_obj + + def PrintOptions(self, options, source_path): + """Prints the options. + + Args: + options: the command line arguments (instance of argparse.Namespace). + source_path: the source path. + """ + self._output_writer.Write(u'\n') + self._output_writer.Write( + u'Source path\t\t\t\t: {0:s}\n'.format(source_path)) + + if self._scan_context.source_type in [ + self._scan_context.SOURCE_TYPE_STORAGE_MEDIA_DEVICE, + self._scan_context.SOURCE_TYPE_STORAGE_MEDIA_IMAGE]: + is_image = True + else: + is_image = False + + self._output_writer.Write( + u'Is storage media image or device\t: {0!s}\n'.format(is_image)) + + if is_image: + image_offset_bytes = self._partition_offset + if isinstance(image_offset_bytes, basestring): + try: + image_offset_bytes = int(image_offset_bytes, 10) + except ValueError: + image_offset_bytes = 0 + elif image_offset_bytes is None: + image_offset_bytes = 0 + + self._output_writer.Write( + u'Partition offset\t\t\t: {0:d} (0x{0:08x})\n'.format( + image_offset_bytes)) + + if self._process_vss and self._vss_stores: + self._output_writer.Write( + u'VSS stores\t\t\t\t: {0!s}\n'.format(self._vss_stores)) + + filter_file = getattr(options, 'file_filter', None) + if filter_file: + self._output_writer.Write(u'Filter file\t\t\t\t: {0:s}\n'.format( + filter_file)) + + self._output_writer.Write(u'\n') + + def ProcessSource(self, options): + """Processes the source. + + Args: + options: the command line arguments (instance of argparse.Namespace). + + Raises: + SourceScannerError: if the source scanner could not find a supported + file system. + UserAbort: if the user initiated an abort. + """ + self.ScanSource(options) + + self.PrintOptions(options, self._source_path) + + if self._partition_offset is None: + self._preprocess = False + + else: + # If we're dealing with a storage media image always run pre-processing. + self._preprocess = True + + self._CheckStorageFile(self._storage_file_path) + + # No need to multi process when we're only processing a single file. + if self._scan_context.source_type == self._scan_context.SOURCE_TYPE_FILE: + # If we are only processing a single file we don't need more than a + # single worker. + # TODO: Refactor this use of using the options object. + options.workers = 1 + self._single_process_mode = True + + if self._scan_context.source_type in [ + self._scan_context.SOURCE_TYPE_DIRECTORY]: + # If we are dealing with a directory we would like to attempt + # pre-processing. + self._preprocess = True + + if self._single_process_mode: + self._ProcessSourceSingleProcessMode(options) + else: + self._ProcessSourceMultiProcessMode(options) + + def SetStorageFile(self, storage_file_path): + """Sets the storage file path. + + Args: + storage_file_path: The path of the storage file. + """ + self._storage_file_path = storage_file_path + + def SetStorageSerializer(self, storage_serializer_format): + """Sets the storage serializer. + + Args: + storage_serializer_format: String denoting the type of serializer + to be used in the storage. The values + can be either "proto" or "json". + """ + if storage_serializer_format not in ( + self._EVENT_SERIALIZER_FORMAT_JSON, + self._EVENT_SERIALIZER_FORMAT_PROTO): + return + self._storage_serializer_format = storage_serializer_format + + def SetRunForeman(self, run_foreman=True): + """Sets a flag indicating whether the frontend should monitor workers. + + Args: + run_foreman: A boolean (defaults to true) that indicates whether or not + the frontend should start a foreman that monitors workers. + """ + self._run_foreman = run_foreman + + def SetShowMemoryInformation(self, show_memory=True): + """Sets a flag telling the worker monitor to show memory information. + + Args: + show_memory: A boolean (defaults to True) that indicates whether or not + the foreman should include memory information as part of + the worker monitoring. + """ + self._show_worker_memory_information = show_memory + + +class AnalysisFrontend(Frontend): + """Class that implements an analysis front-end.""" + + def __init__(self, input_reader, output_writer): + """Initializes the front-end object. + + Args: + input_reader: the input reader (instance of FrontendInputReader). + The default is None which indicates to use the stdin + input reader. + output_writer: the output writer (instance of FrontendOutputWriter). + The default is None which indicates to use the stdout + output writer. + """ + super(AnalysisFrontend, self).__init__(input_reader, output_writer) + + self._storage_file_path = None + + def AddStorageFileOptions(self, argument_group): + """Adds the storage file options to the argument group. + + Args: + argument_group: The argparse argument group (instance of + argparse._ArgumentGroup) or argument parser (instance of + argparse.ArgumentParser). + """ + argument_group.add_argument( + 'storage_file', metavar='STORAGE_FILE', action='store', nargs='?', + type=unicode, default=None, help='The path of the storage file.') + + def OpenStorageFile(self, read_only=True): + """Opens the storage file. + + Args: + read_only: Optional boolean value to indicate the storage file should + be opened in read-only mode. The default is True. + + Returns: + The storage file object (instance of StorageFile). + """ + return storage.StorageFile(self._storage_file_path, read_only=read_only) + + def ParseOptions(self, options): + """Parses the options and initializes the front-end. + + Args: + options: the command line arguments (instance of argparse.Namespace). + + Raises: + BadConfigOption: if the options are invalid. + """ + if not options: + raise errors.BadConfigOption(u'Missing options.') + + self._storage_file_path = getattr(options, 'storage_file', None) + if not self._storage_file_path: + raise errors.BadConfigOption(u'Missing storage file.') + + if not os.path.isfile(self._storage_file_path): + raise errors.BadConfigOption( + u'No such storage file {0:s}.'.format(self._storage_file_path)) diff --git a/plaso/frontend/frontend_test.py b/plaso/frontend/frontend_test.py new file mode 100644 index 0000000..f03f360 --- /dev/null +++ b/plaso/frontend/frontend_test.py @@ -0,0 +1,279 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the front-end object.""" + +import os +import unittest + +from dfvfs.lib import definitions as dfvfs_definitions + +from plaso.frontend import frontend +from plaso.frontend import test_lib +from plaso.lib import errors +from plaso.lib import storage + + +class ExtractionFrontendTests(test_lib.FrontendTestCase): + """Tests for the extraction front-end object.""" + + def _TestScanSourceDirectory(self, test_file): + """Tests the ScanSource function on a directory. + + Args: + test_file: the path of the test file. + """ + test_front_end = frontend.ExtractionFrontend( + self._input_reader, self._output_writer) + + options = test_lib.Options() + options.source = test_file + + test_front_end.ParseOptions(options) + + test_front_end.ScanSource(options) + path_spec = test_front_end.GetSourcePathSpec() + self.assertNotEquals(path_spec, None) + self.assertEquals(path_spec.location, os.path.abspath(test_file)) + self.assertEquals( + path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_OS) + # pylint: disable=protected-access + self.assertEquals(test_front_end._partition_offset, None) + + def _TestScanSourceImage(self, test_file): + """Tests the ScanSource function on the test image. + + Args: + test_file: the path of the test file. + """ + test_front_end = frontend.ExtractionFrontend( + self._input_reader, self._output_writer) + + options = test_lib.Options() + options.source = test_file + + test_front_end.ParseOptions(options) + + test_front_end.ScanSource(options) + path_spec = test_front_end.GetSourcePathSpec() + self.assertNotEquals(path_spec, None) + self.assertEquals( + path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK) + # pylint: disable=protected-access + self.assertEquals(test_front_end._partition_offset, 0) + + def _TestScanSourcePartitionedImage(self, test_file): + """Tests the ScanSource function on the partitioned test image. + + Args: + test_file: the path of the test file. + """ + test_front_end = frontend.ExtractionFrontend( + self._input_reader, self._output_writer) + + options = test_lib.Options() + options.source = test_file + options.image_offset_bytes = 0x0002c000 + + test_front_end.ParseOptions(options) + + test_front_end.ScanSource(options) + path_spec = test_front_end.GetSourcePathSpec() + self.assertNotEquals(path_spec, None) + self.assertEquals( + path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK) + # pylint: disable=protected-access + self.assertEquals(test_front_end._partition_offset, 180224) + + options = test_lib.Options() + options.source = test_file + options.image_offset = 352 + options.bytes_per_sector = 512 + + test_front_end.ParseOptions(options) + + test_front_end.ScanSource(options) + path_spec = test_front_end.GetSourcePathSpec() + self.assertNotEquals(path_spec, None) + self.assertEquals( + path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK) + # pylint: disable=protected-access + self.assertEquals(test_front_end._partition_offset, 180224) + + options = test_lib.Options() + options.source = test_file + options.partition_number = 2 + + test_front_end.ParseOptions(options) + + test_front_end.ScanSource(options) + path_spec = test_front_end.GetSourcePathSpec() + self.assertNotEquals(path_spec, None) + self.assertEquals( + path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK) + # pylint: disable=protected-access + self.assertEquals(test_front_end._partition_offset, 180224) + + def _TestScanSourceVssImage(self, test_file): + """Tests the ScanSource function on the VSS test image. + + Args: + test_file: the path of the test file. + """ + test_front_end = frontend.ExtractionFrontend( + self._input_reader, self._output_writer) + + options = test_lib.Options() + options.source = test_file + options.vss_stores = '1,2' + + test_front_end.ParseOptions(options) + + test_front_end.ScanSource(options) + path_spec = test_front_end.GetSourcePathSpec() + self.assertNotEquals(path_spec, None) + self.assertEquals( + path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK) + # pylint: disable=protected-access + self.assertEquals(test_front_end._partition_offset, 0) + self.assertEquals(test_front_end._vss_stores, [1, 2]) + + options = test_lib.Options() + options.source = test_file + options.vss_stores = '1' + + test_front_end.ParseOptions(options) + + test_front_end.ScanSource(options) + path_spec = test_front_end.GetSourcePathSpec() + self.assertNotEquals(path_spec, None) + self.assertEquals( + path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK) + # pylint: disable=protected-access + self.assertEquals(test_front_end._partition_offset, 0) + self.assertEquals(test_front_end._vss_stores, [1]) + + options = test_lib.Options() + options.source = test_file + options.vss_stores = 'all' + + test_front_end.ParseOptions(options) + + test_front_end.ScanSource(options) + path_spec = test_front_end.GetSourcePathSpec() + self.assertNotEquals(path_spec, None) + self.assertEquals( + path_spec.type_indicator, dfvfs_definitions.TYPE_INDICATOR_TSK) + # pylint: disable=protected-access + self.assertEquals(test_front_end._partition_offset, 0) + self.assertEquals(test_front_end._vss_stores, [1, 2]) + + def setUp(self): + """Sets up the objects used throughout the test.""" + self._input_reader = frontend.StdinFrontendInputReader() + self._output_writer = frontend.StdoutFrontendOutputWriter() + + def testParseOptions(self): + """Tests the parse options function.""" + test_front_end = frontend.ExtractionFrontend( + self._input_reader, self._output_writer) + + options = test_lib.Options() + + with self.assertRaises(errors.BadConfigOption): + test_front_end.ParseOptions(options) + + options.source = self._GetTestFilePath([u'ímynd.dd']) + + test_front_end.ParseOptions(options) + + def testScanSource(self): + """Tests the ScanSource function.""" + test_file = self._GetTestFilePath([u'tsk_volume_system.raw']) + self._TestScanSourcePartitionedImage(test_file) + + test_file = self._GetTestFilePath([u'image-split.E01']) + self._TestScanSourcePartitionedImage(test_file) + + test_file = self._GetTestFilePath([u'image.E01']) + self._TestScanSourceImage(test_file) + + test_file = self._GetTestFilePath([u'image.qcow2']) + self._TestScanSourceImage(test_file) + + test_file = self._GetTestFilePath([u'vsstest.qcow2']) + self._TestScanSourceVssImage(test_file) + + test_file = self._GetTestFilePath([u'text_parser']) + self._TestScanSourceDirectory(test_file) + + test_file = self._GetTestFilePath([u'image.vhd']) + self._TestScanSourceImage(test_file) + + test_file = self._GetTestFilePath([u'image.vmdk']) + self._TestScanSourceImage(test_file) + + with self.assertRaises(errors.SourceScannerError): + test_file = self._GetTestFilePath(['nosuchfile.raw']) + self._TestScanSourceImage(test_file) + + +class AnalysisFrontendTests(test_lib.FrontendTestCase): + """Tests for the analysis front-end object.""" + + def setUp(self): + """Sets up the objects used throughout the test.""" + self._input_reader = frontend.StdinFrontendInputReader() + self._output_writer = frontend.StdoutFrontendOutputWriter() + + def testOpenStorageFile(self): + """Tests the open storage file function.""" + test_front_end = frontend.AnalysisFrontend( + self._input_reader, self._output_writer) + + options = test_lib.Options() + options.storage_file = self._GetTestFilePath([u'psort_test.out']) + + test_front_end.ParseOptions(options) + storage_file = test_front_end.OpenStorageFile() + + self.assertIsInstance(storage_file, storage.StorageFile) + + storage_file.Close() + + def testParseOptions(self): + """Tests the parse options function.""" + test_front_end = frontend.AnalysisFrontend( + self._input_reader, self._output_writer) + + options = test_lib.Options() + + with self.assertRaises(errors.BadConfigOption): + test_front_end.ParseOptions(options) + + options.storage_file = self._GetTestFilePath([u'no_such_file.out']) + + with self.assertRaises(errors.BadConfigOption): + test_front_end.ParseOptions(options) + + options.storage_file = self._GetTestFilePath([u'psort_test.out']) + + test_front_end.ParseOptions(options) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/frontend/image_export.py b/plaso/frontend/image_export.py new file mode 100755 index 0000000..36c56da --- /dev/null +++ b/plaso/frontend/image_export.py @@ -0,0 +1,700 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The image export front-end.""" + +import argparse +import collections +import hashlib +import logging +import os +import sys + +from dfvfs.helpers import file_system_searcher +from dfvfs.lib import definitions as dfvfs_definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.artifacts import knowledge_base +from plaso.engine import collector +from plaso.engine import utils as engine_utils +from plaso.engine import queue +from plaso.engine import single_process +from plaso.frontend import frontend +from plaso.frontend import utils as frontend_utils +from plaso.lib import errors +from plaso.lib import timelib +from plaso.preprocessors import interface as preprocess_interface +from plaso.preprocessors import manager as preprocess_manager + + +def CalculateHash(file_object): + """Return a hash for a given file object.""" + md5 = hashlib.md5() + file_object.seek(0) + + data = file_object.read(4098) + while data: + md5.update(data) + data = file_object.read(4098) + + return md5.hexdigest() + + +class DateFilter(object): + """Class that implements a date filter for file entries.""" + + DATE_FILTER_INSTANCE = collections.namedtuple( + 'date_filter_instance', 'type start end') + + DATE_FILTER_TYPES = frozenset([ + u'atime', u'bkup', u'ctime', u'crtime', u'dtime', u'mtime']) + + def __init__(self): + """Initialize the date filter object.""" + super(DateFilter, self).__init__() + self._filters = [] + + @property + def number_of_filters(self): + """Return back the filter count.""" + return len(self._filters) + + def Add(self, filter_type, filter_start=None, filter_end=None): + """Add a date filter. + + Args: + filter_type: String that defines what timestamp is affected by the + date filter, valid values are atime, ctime, crtime, + dtime, bkup and mtime. + filter_start: Optional start date of the filter. This is a string + in the form of "YYYY-MM-DD HH:MM:SS", or "YYYY-MM-DD". + If not supplied there will be no limitation to the initial + timeframe. + filter_end: Optional end date of the filter. This is a string + in the form of "YYYY-MM-DD HH:MM:SS", or "YYYY-MM-DD". + If not supplied there will be no limitation to the initial + timeframe. + + Raises: + errors.WrongFilterOption: If the filter is badly formed. + """ + if not isinstance(filter_type, basestring): + raise errors.WrongFilterOption(u'Filter type must be a string.') + + if filter_start is None and filter_end is None: + raise errors.WrongFilterOption( + u'A date filter has to have either a start or an end date.') + + filter_type_lower = filter_type.lower() + if filter_type_lower not in self.DATE_FILTER_TYPES: + raise errors.WrongFilterOption(u'Unknown filter type: {0:s}.'.format( + filter_type)) + + date_filter_type = filter_type_lower + date_filter_start = None + date_filter_end = None + + if filter_start is not None: + # If the date string is invalid the timestamp will be set to zero, + # which is also a valid date. Thus all invalid timestamp strings + # will be set to filter from the POSIX epoch time. + # Thus the actual value of the filter is printed out so that the user + # may catch this potentially unwanted behavior. + date_filter_start = timelib.Timestamp.FromTimeString(filter_start) + logging.info( + u'Date filter for start date configured: [{0:s}] {1:s}'.format( + date_filter_type, + timelib.Timestamp.CopyToIsoFormat(date_filter_start))) + + if filter_end is not None: + date_filter_end = timelib.Timestamp.FromTimeString(filter_end) + logging.info( + u'Date filter for end date configured: [{0:s}] {1:s}'.format( + date_filter_type, + timelib.Timestamp.CopyToIsoFormat(date_filter_end))) + + # Make sure that the end timestamp occurs after the beginning. + # If not then we need to reverse the time range. + if (date_filter_start is not None and + date_filter_start > date_filter_end): + temporary_placeholder = date_filter_end + date_filter_end = date_filter_start + date_filter_start = temporary_placeholder + + self._filters.append(self.DATE_FILTER_INSTANCE( + date_filter_type, date_filter_start, date_filter_end)) + + def CompareFileEntry(self, file_entry): + """Compare the set date filters against timestamps of a file entry. + + Args: + file_entry: The file entry (instance of dfvfs.FileEntry). + + Returns: + True, if there are no date filters set. Otherwise the date filters are + compared and True only returned if the timestamps are outside of the time + range. + + Raises: + errors.WrongFilterOption: If an attempt is made to filter against + a date type that is not stored in the stat + object. + """ + if not self._filters: + return True + + # Compare timestamps of the file entry. + stat = file_entry.GetStat() + + # Go over each filter. + for date_filter in self._filters: + posix_time = getattr(stat, date_filter.type, None) + + if posix_time is None: + # Trying to filter against a date type that is not saved in the stat + # object. + raise errors.WrongFilterOption( + u'Date type: {0:s} is not stored in the file entry'.format( + date_filter.type)) + + timestamp = timelib.Timestamp.FromPosixTime(posix_time) + + if date_filter.start is not None and (timestamp < date_filter.start): + logging.debug(( + u'[skipping] Not saving file: {0:s}, timestamp out of ' + u'range.').format(file_entry.path_spec.location)) + return False + + if date_filter.end is not None and (timestamp > date_filter.end): + logging.debug(( + u'[skipping] Not saving file: {0:s}, timestamp out of ' + u'range.').format(file_entry.path_spec.location)) + return False + + return True + + def Remove(self, filter_type, filter_start=None, filter_end=None): + """Remove a date filter from the set of defined date filters. + + Args: + filter_type: String that defines what timestamp is affected by the + date filter, valid values are atime, ctime, crtime, + dtime, bkup and mtime. + filter_start: Optional start date of the filter. This is a string + in the form of "YYYY-MM-DD HH:MM:SS", or "YYYY-MM-DD". + If not supplied there will be no limitation to the initial + timeframe. + filter_end: Optional end date of the filter. This is a string + in the form of "YYYY-MM-DD HH:MM:SS", or "YYYY-MM-DD". + If not supplied there will be no limitation to the initial + timeframe. + """ + if not self._filters: + return + + # TODO: Instead of doing it this way calculate a hash for every filter + # that is stored and use that for removals. + for date_filter_index, date_filter in enumerate(self._filters): + if filter_start is None: + date_filter_start = filter_start + else: + date_filter_start = timelib.Timestamp.FromTimeString(filter_start) + if filter_end is None: + date_filter_end = filter_end + else: + date_filter_end = timelib.Timestamp.FromTimeString(filter_end) + + if (date_filter.type == filter_type and + date_filter.start == date_filter_start and + date_filter.end == date_filter_end): + del self._filters[date_filter_index] + return + + def Reset(self): + """Resets the date filter.""" + self._filters = [] + + +class FileSaver(object): + """A simple class that is used to save files.""" + + md5_dict = {} + calc_md5 = False + # TODO: Move this functionality into the frontend as a state attribute. + _date_filter = None + + @classmethod + def SetDateFilter(cls, date_filter): + """Set a date filter for the file saver. + + If a date filter is set files will not be saved unless they are within + the time boundaries. + + Args: + date_filter: A date filter object (instance of DateFilter). + """ + cls._date_filter = date_filter + + @classmethod + def WriteFile(cls, source_path_spec, destination_path, filename_prefix=''): + """Writes the contents of the source to the destination file. + + Args: + source_path_spec: the path specification of the source file. + destination_path: the path of the destination file. + filename_prefix: optional prefix for the filename. The default is an + empty string. + """ + file_entry = path_spec_resolver.Resolver.OpenFileEntry(source_path_spec) + directory = u'' + filename = getattr(source_path_spec, 'location', None) + if not filename: + filename = source_path_spec.file_path + + # There will be issues on systems that use a different separator than a + # forward slash. However a forward slash is always used in the pathspec. + if os.path.sep != u'/': + filename = filename.replace(u'/', os.path.sep) + + if os.path.sep in filename: + directory_string, _, filename = filename.rpartition(os.path.sep) + if directory_string: + directory = os.path.join( + destination_path, *directory_string.split(os.path.sep)) + + if filename_prefix: + extracted_filename = u'{0:s}_{1:s}'.format(filename_prefix, filename) + else: + extracted_filename = filename + + while extracted_filename.startswith(os.path.sep): + extracted_filename = extracted_filename[1:] + + if directory: + if not os.path.isdir(directory): + os.makedirs(directory) + else: + directory = destination_path + + if cls.calc_md5: + stat = file_entry.GetStat() + inode = getattr(stat, 'ino', 0) + file_object = file_entry.GetFileObject() + md5sum = CalculateHash(file_object) + if inode in cls.md5_dict: + if md5sum in cls.md5_dict[inode]: + return + cls.md5_dict[inode].append(md5sum) + else: + cls.md5_dict[inode] = [md5sum] + + # Check if we do not want to save the file. + if cls._date_filter and not cls._date_filter.CompareFileEntry(file_entry): + return + + try: + file_object = file_entry.GetFileObject() + frontend_utils.OutputWriter.WriteFile( + file_object, os.path.join(directory, extracted_filename)) + except IOError as exception: + logging.error( + u'[skipping] unable to save file: {0:s} with error: {1:s}'.format( + filename, exception)) + + +class ImageExtractorQueueConsumer(queue.ItemQueueConsumer): + """Class that implements an image extractor queue consumer.""" + + def __init__(self, process_queue, extensions, destination_path): + """Initializes the image extractor queue consumer. + + Args: + process_queue: the process queue (instance of Queue). + extensions: a list of extensions. + destination_path: the path where the extracted files should be stored. + """ + super(ImageExtractorQueueConsumer, self).__init__(process_queue) + self._destination_path = destination_path + self._extensions = extensions + + def _ConsumeItem(self, path_spec): + """Consumes an item callback for ConsumeItems. + + Args: + path_spec: a path specification (instance of dfvfs.PathSpec). + """ + # TODO: move this into a function of path spec e.g. GetExtension(). + location = getattr(path_spec, 'location', None) + if not location: + location = path_spec.file_path + _, _, extension = location.rpartition('.') + if extension.lower() in self._extensions: + vss_store_number = getattr(path_spec, 'vss_store_number', None) + if vss_store_number is not None: + filename_prefix = 'vss_{0:d}'.format(vss_store_number + 1) + else: + filename_prefix = '' + + FileSaver.WriteFile( + path_spec, self._destination_path, filename_prefix=filename_prefix) + + +class ImageExportFrontend(frontend.StorageMediaFrontend): + """Class that implements the image export front-end.""" + + def __init__(self): + """Initializes the front-end object.""" + input_reader = frontend.StdinFrontendInputReader() + output_writer = frontend.StdoutFrontendOutputWriter() + + super(ImageExportFrontend, self).__init__(input_reader, output_writer) + + self._knowledge_base = None + self._remove_duplicates = True + self._source_path_spec = None + + # TODO: merge with collector and/or engine. + def _ExtractWithExtensions(self, extensions, destination_path): + """Extracts files using extensions. + + Args: + extensions: a list of extensions. + destination_path: the path where the extracted files should be stored. + """ + logging.info(u'Finding files with extensions: {0:s}'.format(extensions)) + + if not os.path.isdir(destination_path): + os.makedirs(destination_path) + + input_queue = single_process.SingleProcessQueue() + + # TODO: add support to handle multiple partitions. + self._source_path_spec = self.GetSourcePathSpec() + + image_collector = collector.Collector( + input_queue, self._source_path, self._source_path_spec) + + image_collector.Collect() + + FileSaver.calc_md5 = self._remove_duplicates + + input_queue_consumer = ImageExtractorQueueConsumer( + input_queue, extensions, destination_path) + input_queue_consumer.ConsumeItems() + + # TODO: merge with collector and/or engine. + def _ExtractWithFilter(self, filter_file_path, destination_path): + """Extracts files using a filter expression. + + This method runs the file extraction process on the image and + potentially on every VSS if that is wanted. + + Args: + filter_file_path: The path of the file that contains the filter + expressions. + destination_path: The path where the extracted files should be stored. + """ + # TODO: add support to handle multiple partitions. + self._source_path_spec = self.GetSourcePathSpec() + + searcher = self._GetSourceFileSystemSearcher( + resolver_context=self._resolver_context) + + if self._knowledge_base is None: + self._Preprocess(searcher) + + if not os.path.isdir(destination_path): + os.makedirs(destination_path) + + find_specs = engine_utils.BuildFindSpecsFromFile( + filter_file_path, pre_obj=self._knowledge_base.pre_obj) + + # Save the regular files. + FileSaver.calc_md5 = self._remove_duplicates + + for path_spec in searcher.Find(find_specs=find_specs): + FileSaver.WriteFile(path_spec, destination_path) + + if self._process_vss and self._vss_stores: + volume_path_spec = self._source_path_spec.parent + + logging.info(u'Extracting files from VSS.') + vss_path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_VSHADOW, location=u'/', + parent=volume_path_spec) + + vss_file_entry = path_spec_resolver.Resolver.OpenFileEntry(vss_path_spec) + + number_of_vss = vss_file_entry.number_of_sub_file_entries + + # In plaso 1 represents the first store index in dfvfs and pyvshadow 0 + # represents the first store index so 1 is subtracted. + vss_store_range = [store_nr - 1 for store_nr in self._vss_stores] + + for store_index in vss_store_range: + logging.info(u'Extracting files from VSS {0:d} out of {1:d}'.format( + store_index + 1, number_of_vss)) + + vss_path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_VSHADOW, store_index=store_index, + parent=volume_path_spec) + path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', + parent=vss_path_spec) + + filename_prefix = 'vss_{0:d}'.format(store_index) + + file_system = path_spec_resolver.Resolver.OpenFileSystem( + path_spec, resolver_context=self._resolver_context) + searcher = file_system_searcher.FileSystemSearcher( + file_system, vss_path_spec) + + for path_spec in searcher.Find(find_specs=find_specs): + FileSaver.WriteFile( + path_spec, destination_path, filename_prefix=filename_prefix) + + # TODO: refactor, this is a duplicate of the function in engine. + def _GetSourceFileSystemSearcher(self, resolver_context=None): + """Retrieves the file system searcher of the source. + + Args: + resolver_context: Optional resolver context (instance of dfvfs.Context). + The default is None. Note that every thread or process + must have its own resolver context. + + Returns: + The file system searcher object (instance of dfvfs.FileSystemSearcher). + + Raises: + RuntimeError: if source path specification is not set. + """ + if not self._source_path_spec: + raise RuntimeError(u'Missing source.') + + file_system = path_spec_resolver.Resolver.OpenFileSystem( + self._source_path_spec, resolver_context=resolver_context) + + type_indicator = self._source_path_spec.type_indicator + if type_indicator == dfvfs_definitions.TYPE_INDICATOR_OS: + mount_point = self._source_path_spec + else: + mount_point = self._source_path_spec.parent + + return file_system_searcher.FileSystemSearcher(file_system, mount_point) + + def _Preprocess(self, searcher): + """Preprocesses the image. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + """ + if self._knowledge_base is not None: + return + + self._knowledge_base = knowledge_base.KnowledgeBase() + + logging.info(u'Guessing OS') + + platform = preprocess_interface.GuessOS(searcher) + logging.info(u'OS: {0:s}'.format(platform)) + + logging.info(u'Running preprocess.') + + preprocess_manager.PreprocessPluginsManager.RunPlugins( + platform, searcher, self._knowledge_base) + + logging.info(u'Preprocess done, saving files from image.') + + def ParseOptions(self, options, source_option='source'): + """Parses the options and initializes the front-end. + + Args: + options: the command line arguments (instance of argparse.Namespace). + source_option: optional name of the source option. The default is source. + + Raises: + BadConfigOption: if the options are invalid. + """ + super(ImageExportFrontend, self).ParseOptions( + options, source_option=source_option) + + filter_file = getattr(options, 'filter', None) + if not filter_file and not getattr(options, 'extension_string', None): + raise errors.BadConfigOption( + u'Neither an extension string or a filter is defined.') + + if filter_file and not os.path.isfile(filter_file): + raise errors.BadConfigOption( + u'Unable to proceed, filter file: {0:s} does not exist.'.format( + filter_file)) + + if (getattr(options, 'no_vss', False) or + getattr(options, 'include_duplicates', False)): + self._remove_duplicates = False + + # Process date filter. + date_filters = getattr(options, 'date_filters', []) + if date_filters: + date_filter_object = DateFilter() + + for date_filter in date_filters: + date_filter_pieces = date_filter.split(',') + if len(date_filter_pieces) != 3: + raise errors.BadConfigOption( + u'Date filter badly formed: {0:s}'.format(date_filter)) + + filter_type, filter_start, filter_end = date_filter_pieces + date_filter_object.Add( + filter_type=filter_type.strip(), filter_start=filter_start.strip(), + filter_end=filter_end.strip()) + + # TODO: Move the date filter to the front-end as an attribute. + FileSaver.SetDateFilter(date_filter_object) + + def ProcessSource(self, options): + """Processes the source. + + Args: + options: the command line arguments (instance of argparse.Namespace). + + Raises: + SourceScannerError: if the source scanner could not find a supported + file system. + UserAbort: if the user initiated an abort. + """ + self.ScanSource(options) + + filter_file = getattr(options, 'filter', None) + if filter_file: + self._ExtractWithFilter(filter_file, options.path) + + extension_string = getattr(options, 'extension_string', None) + if extension_string: + extensions = [x.strip() for x in extension_string.split(',')] + + self._ExtractWithExtensions(extensions, options.path) + logging.info(u'Files based on extension extracted.') + + +def Main(): + """The main function, running the show.""" + front_end = ImageExportFrontend() + + arg_parser = argparse.ArgumentParser( + description=( + 'This is a simple collector designed to export files inside an ' + 'image, both within a regular RAW image as well as inside a VSS. ' + 'The tool uses a collection filter that uses the same syntax as a ' + 'targeted plaso filter.'), + epilog='And that\'s how you export files, plaso style.') + + arg_parser.add_argument( + '-d', '--debug', dest='debug', action='store_true', default=False, + help='Turn on debugging information.') + + arg_parser.add_argument( + '-w', '--write', dest='path', action='store', default='.', type=str, + help='The directory in which extracted files should be stored in.') + + arg_parser.add_argument( + '-x', '--extensions', dest='extension_string', action='store', + type=str, metavar='EXTENSION_STRING', help=( + 'If the purpose is to find all files given a certain extension ' + 'this options should be used. This option accepts a comma separated ' + 'string denoting all file extensions, eg: -x "csv,docx,pst".')) + + arg_parser.add_argument( + '-f', '--filter', action='store', dest='filter', metavar='FILTER_FILE', + type=str, help=( + 'Full path to the file that contains the collection filter, ' + 'the file can use variables that are defined in preprocesing, ' + 'just like any other log2timeline/plaso collection filter.')) + + arg_parser.add_argument( + '--date-filter', '--date_filter', action='append', type=str, + dest='date_filters', metavar="TYPE_START_END", default=None, help=( + 'Add a date based filter to the export criteria. If a date based ' + 'filter is set no file is saved unless it\'s within the date ' + 'boundary. This parameter should be in the form of "TYPE,START,END" ' + 'where TYPE defines which timestamp this date filter affects, eg: ' + 'atime, ctime, crtime, bkup, etc. START defines the start date and ' + 'time of the boundary and END defines the end time. Both timestamps ' + 'are optional and should be set as - if not needed. The correct form ' + 'of the timestamp value is in the form of "YYYY-MM-DD HH:MM:SS" or ' + '"YYYY-MM-DD". Examples are "atime, 2013-01-01 23:12:14, 2013-02-23" ' + 'This parameter can be repeated as needed to add additional date ' + 'date boundaries, eg: once for atime, once for crtime, etc.')) + + arg_parser.add_argument( + '--include_duplicates', dest='include_duplicates', action='store_true', + default=False, help=( + 'By default if VSS is turned on all files saved will have their ' + 'MD5 sum calculated and compared to other files already saved ' + 'with the same inode value. If the MD5 sum is the same the file ' + 'does not get saved again. This option turns off that behavior ' + 'so that all files will get stored, even if they are duplicates.')) + + front_end.AddImageOptions(arg_parser) + front_end.AddVssProcessingOptions(arg_parser) + + arg_parser.add_argument( + 'image', action='store', metavar='IMAGE', default=None, type=str, help=( + 'The full path to the image file that we are about to extract files ' + 'from, it should be a raw image or another image that plaso ' + 'supports.')) + + options = arg_parser.parse_args() + + format_str = u'%(asctime)s [%(levelname)s] %(message)s' + if options.debug: + logging.basicConfig(level=logging.DEBUG, format=format_str) + else: + logging.basicConfig(level=logging.INFO, format=format_str) + + try: + front_end.ParseOptions(options, source_option='image') + except errors.BadConfigOption as exception: + arg_parser.print_help() + print u'' + logging.error(u'{0:s}'.format(exception)) + return False + + try: + front_end.ProcessSource(options) + logging.info(u'Processing completed.') + + except (KeyboardInterrupt, errors.UserAbort): + logging.warning(u'Aborted by user.') + return False + + except errors.SourceScannerError as exception: + logging.warning(( + u'Unable to scan for a supported filesystem with error: {0:s}\n' + u'Most likely the image format is not supported by the ' + u'tool.').format(exception)) + return False + + return True + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/plaso/frontend/image_export_test.py b/plaso/frontend/image_export_test.py new file mode 100644 index 0000000..a95bdcb --- /dev/null +++ b/plaso/frontend/image_export_test.py @@ -0,0 +1,237 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the image export front-end.""" + +import glob +import os +import shutil +import tempfile +import unittest + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.frontend import image_export +from plaso.frontend import test_lib +from plaso.lib import errors + + +class Log2TimelineFrontendTest(test_lib.FrontendTestCase): + """Tests for the image export front-end.""" + + def setUp(self): + """Sets up the objects used throughout the test.""" + self._temp_directory = tempfile.mkdtemp() + + def tearDown(self): + """Cleans up the objects used throughout the test.""" + shutil.rmtree(self._temp_directory, True) + + def testProcessSourceExtractWithDateFilter(self): + """Tests extract with file filter and date filter functionality.""" + test_front_end = image_export.ImageExportFrontend() + + options = test_lib.Options() + options.image = self._GetTestFilePath([u'image.qcow2']) + options.path = self._temp_directory + options.include_duplicates = True + + options.filter = os.path.join(self._temp_directory, u'filter.txt') + with open(options.filter, 'wb') as file_object: + file_object.write('/a_directory/.+_file\n') + + test_front_end.ParseOptions(options, source_option='image') + + # Set the date filter. + filter_start = '2012-05-25 15:59:00' + filter_end = '2012-05-25 15:59:20' + + date_filter_object = image_export.DateFilter() + date_filter_object.Add( + filter_start=filter_start, filter_end=filter_end, + filter_type='ctime') + image_export.FileSaver.SetDateFilter(date_filter_object) + + test_front_end.ProcessSource(options) + + expected_text_files = sorted([ + os.path.join(self._temp_directory, u'a_directory', u'a_file')]) + + text_files = glob.glob(os.path.join( + self._temp_directory, u'a_directory', u'*')) + + self.assertEquals(sorted(text_files), expected_text_files) + + # We need to reset the date filter to not affect other tests. + # pylint: disable=protected-access + # TODO: Remove this once filtering has been moved to the front end object. + image_export.FileSaver._date_filter = None + + def testProcessSourceExtractWithExtensions(self): + """Tests extract with extensions process source functionality.""" + test_front_end = image_export.ImageExportFrontend() + + options = test_lib.Options() + options.image = self._GetTestFilePath([u'image.qcow2']) + options.path = self._temp_directory + options.extension_string = u'txt' + + test_front_end.ParseOptions(options, source_option='image') + + test_front_end.ProcessSource(options) + + expected_text_files = sorted([ + os.path.join(self._temp_directory, u'passwords.txt')]) + + text_files = glob.glob(os.path.join(self._temp_directory, u'*')) + + self.assertEquals(sorted(text_files), expected_text_files) + + def testProcessSourceExtractWithFilter(self): + """Tests extract with filter process source functionality.""" + test_front_end = image_export.ImageExportFrontend() + + options = test_lib.Options() + options.image = self._GetTestFilePath([u'image.qcow2']) + options.path = self._temp_directory + + options.filter = os.path.join(self._temp_directory, u'filter.txt') + with open(options.filter, 'wb') as file_object: + file_object.write('/a_directory/.+_file\n') + + test_front_end.ParseOptions(options, source_option='image') + + test_front_end.ProcessSource(options) + + expected_text_files = sorted([ + os.path.join(self._temp_directory, u'a_directory', u'another_file'), + os.path.join(self._temp_directory, u'a_directory', u'a_file')]) + + text_files = glob.glob(os.path.join( + self._temp_directory, u'a_directory', u'*')) + + self.assertEquals(sorted(text_files), expected_text_files) + + def testDateFilter(self): + """Test the save file based on date filter function.""" + # Open up a file entry. + path = self._GetTestFilePath([u'ímynd.dd']) + os_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=path) + tsk_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TSK, inode=16, + location=u'/a_directory/another_file', parent=os_path_spec) + + file_entry = path_spec_resolver.Resolver.OpenFileEntry(tsk_path_spec) + + # Timestamps of file: + # Modified: 2012-05-25T15:59:23+00:00 + # Accessed: 2012-05-25T15:59:23+00:00 + # Created: 2012-05-25T15:59:23+00:00 + + # Create the date filter object. + date_filter = image_export.DateFilter() + + # No date filter set + self.assertTrue( + date_filter.CompareFileEntry(file_entry)) + + # Add a date to the date filter. + date_filter.Add( + filter_start='2012-05-25 15:59:20', filter_end='2012-05-25 15:59:25', + filter_type='ctime') + + self.assertTrue(date_filter.CompareFileEntry(file_entry)) + date_filter.Reset() + + date_filter.Add( + filter_start='2012-05-25 15:59:24', filter_end='2012-05-25 15:59:55', + filter_type='ctime') + self.assertFalse(date_filter.CompareFileEntry(file_entry)) + date_filter.Reset() + + # Testing a timestamp that does not exist in the stat object. + date_filter.Add(filter_type='bkup', filter_start='2012-02-02 12:12:12') + with self.assertRaises(errors.WrongFilterOption): + date_filter.CompareFileEntry(file_entry) + + # Testing adding a badly formatter filter. + with self.assertRaises(errors.WrongFilterOption): + date_filter.Add(filter_type='foobar', filter_start='2012-02-01 01:01:01') + date_filter.Reset() + + # Testing adding a badly formatter filter, no date set. + with self.assertRaises(errors.WrongFilterOption): + date_filter.Add(filter_type='atime') + date_filter.Reset() + + # Just end date set. + date_filter.Add( + filter_end='2012-05-25 15:59:55', filter_type='mtime') + self.assertTrue(date_filter.CompareFileEntry(file_entry)) + date_filter.Reset() + + # Just with a start date but within range. + date_filter.Add( + filter_start='2012-03-25 15:59:55', filter_type='atime') + self.assertTrue(date_filter.CompareFileEntry(file_entry)) + date_filter.Reset() + + # And now with a start date, but out of range. + date_filter.Add( + filter_start='2012-05-25 15:59:55', filter_type='ctime') + self.assertFalse(date_filter.CompareFileEntry(file_entry)) + date_filter.Reset() + + # Test with more than one date filter. + date_filter.Add( + filter_start='2012-05-25 15:59:55', filter_type='ctime', + filter_end='2012-05-25 17:34:12') + date_filter.Add( + filter_start='2012-05-25 15:59:20', filter_end='2012-05-25 15:59:25', + filter_type='atime') + date_filter.Add( + filter_start='2012-05-25 15:59:24', filter_end='2012-05-25 15:59:55', + filter_type='mtime') + self.assertFalse(date_filter.CompareFileEntry(file_entry)) + self.assertEquals(date_filter.number_of_filters, 3) + # Remove a filter. + date_filter.Remove( + filter_start='2012-05-25 15:59:55', filter_type='ctime', + filter_end='2012-05-25 17:34:12') + self.assertEquals(date_filter.number_of_filters, 2) + + # Remove a date filter that does not exist. + date_filter.Remove( + filter_start='2012-05-25 11:59:55', filter_type='ctime', + filter_end='2012-05-25 17:34:12') + self.assertEquals(date_filter.number_of_filters, 2) + + date_filter.Add( + filter_end='2012-05-25 15:59:25', filter_type='atime') + self.assertEquals(date_filter.number_of_filters, 3) + date_filter.Remove( + filter_end='2012-05-25 15:59:25', filter_type='atime') + self.assertEquals(date_filter.number_of_filters, 2) + + date_filter.Reset() + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/frontend/log2timeline.py b/plaso/frontend/log2timeline.py new file mode 100755 index 0000000..b65e661 --- /dev/null +++ b/plaso/frontend/log2timeline.py @@ -0,0 +1,454 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The log2timeline front-end.""" + +import argparse +import logging +import multiprocessing +import sys +import time +import textwrap + +import plaso + +# Registering output modules so that output bypass works. +from plaso import output as _ # pylint: disable=unused-import +from plaso.frontend import frontend +from plaso.frontend import utils as frontend_utils +from plaso.lib import errors +from plaso.parsers import manager as parsers_manager + +import pytz + + +class LoggingFilter(logging.Filter): + """Class that implements basic filtering of log events for plaso. + + Some libraries, like binplist, introduce excessive amounts of + logging that clutters down the debug logs of plaso, making them + almost non-usable. This class implements a filter designed to make + the debug logs more clutter-free. + """ + + def filter(self, record): + """Filter messages sent to the logging infrastructure.""" + if record.module == 'binplist' and record.levelno == logging.DEBUG: + return False + + return True + + +class Log2TimelineFrontend(frontend.ExtractionFrontend): + """Class that implements the log2timeline front-end.""" + + _BYTES_IN_A_MIB = 1024 * 1024 + + def __init__(self): + """Initializes the front-end object.""" + input_reader = frontend.StdinFrontendInputReader() + output_writer = frontend.StdoutFrontendOutputWriter() + + super(Log2TimelineFrontend, self).__init__(input_reader, output_writer) + + def _GetPluginData(self): + """Return a dict object with a list of all available parsers and plugins.""" + return_dict = {} + + # Import all plugins and parsers to print out the necessary information. + # This is not import at top since this is only required if this parameter + # is set, otherwise these libraries get imported in their respected + # locations. + + # The reason why some of these libraries are imported as '_' is to make sure + # all appropriate parsers and plugins are registered, yet we don't need to + # directly call these libraries, it is enough to load them up to get them + # registered. + + # TODO: remove this hack includes should be a the top if this does not work + # remove the need for implicit behavior on import. + from plaso import filters + from plaso import parsers as _ + from plaso import output as _ + from plaso.frontend import presets + from plaso.lib import output + + return_dict['Versions'] = [ + ('plaso engine', plaso.GetVersion()), + ('python', sys.version)] + + return_dict['Parsers'] = [] + for _, parser_class in parsers_manager.ParsersManager.GetParsers(): + description = getattr(parser_class, 'DESCRIPTION', u'') + return_dict['Parsers'].append((parser_class.NAME, description)) + + return_dict['Parser Lists'] = [] + for category, parsers in sorted(presets.categories.items()): + return_dict['Parser Lists'].append((category, ', '.join(parsers))) + + return_dict['Output Modules'] = [] + for name, description in sorted(output.ListOutputFormatters()): + return_dict['Output Modules'].append((name, description)) + + return_dict['Plugins'] = [] + for _, parser_class in parsers_manager.ParsersManager.GetParsers(): + if parser_class.SupportsPlugins(): + for _, plugin_class in parser_class.GetPlugins(): + description = getattr(plugin_class, 'DESCRIPTION', u'') + return_dict['Plugins'].append((plugin_class.NAME, description)) + + return_dict['Filters'] = [] + for filter_obj in sorted(filters.ListFilters()): + doc_string, _, _ = filter_obj.__doc__.partition('\n') + return_dict['Filters'].append((filter_obj.filter_name, doc_string)) + + return return_dict + + def _GetTimeZones(self): + """Returns a generator of the names of all the supported time zones.""" + yield 'local' + for zone in pytz.all_timezones: + yield zone + + def ListPluginInformation(self): + """Lists all plugin and parser information.""" + plugin_list = self._GetPluginData() + return_string_pieces = [] + + return_string_pieces.append( + u'{:=^80}'.format(u' log2timeline/plaso information. ')) + + for header, data in plugin_list.items(): + # TODO: Using the frontend utils here instead of "self.PrintHeader" + # since the desired output here is a string that can be sent later + # to an output writer. Change this entire function so it can utilize + # PrintHeader or something similar. + return_string_pieces.append(frontend_utils.FormatHeader(header)) + for entry_header, entry_data in sorted(data): + return_string_pieces.append( + frontend_utils.FormatOutputString(entry_header, entry_data)) + + return_string_pieces.append(u'') + self._output_writer.Write(u'\n'.join(return_string_pieces)) + + def ListTimeZones(self): + """Lists the time zones.""" + self._output_writer.Write(u'=' * 40) + self._output_writer.Write(u' ZONES') + self._output_writer.Write(u'-' * 40) + for timezone in self._GetTimeZones(): + self._output_writer.Write(u' {0:s}'.format(timezone)) + self._output_writer.Write(u'=' * 40) + + +def Main(): + """Start the tool.""" + multiprocessing.freeze_support() + + front_end = Log2TimelineFrontend() + + epilog = u'\n'.join([ + u'', + u'Example usage:', + u'', + u'Run the tool against an image (full kitchen sink)', + u' log2timeline.py /cases/mycase/plaso.dump ímynd.dd', + u'', + u'Instead of answering questions, indicate some of the options on the', + u'command line (including data from particular VSS stores).', + (u' log2timeline.py -o 63 --vss_stores 1,2 /cases/plaso_vss.dump ' + u'image.E01'), + u'', + u'And that\'s how you build a timeline using log2timeline...', + u'']) + + description = u'\n'.join([ + u'', + u'log2timeline is the main front-end to the plaso back-end, used to', + u'collect and correlate events extracted from a filesystem.', + u'', + u'More information can be gathered from here:', + u' http://plaso.kiddaland.net/usage/log2timeline', + u'']) + + arg_parser = argparse.ArgumentParser( + description=textwrap.dedent(description), + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent(epilog), add_help=False) + + # Create few argument groups to make formatting help messages clearer. + info_group = arg_parser.add_argument_group('Informational Arguments') + function_group = arg_parser.add_argument_group('Functional Arguments') + deep_group = arg_parser.add_argument_group('Deep Analysis Arguments') + performance_group = arg_parser.add_argument_group('Performance Arguments') + + function_group.add_argument( + '-z', '--zone', '--timezone', dest='timezone', action='store', type=str, + default='UTC', help=( + u'Define the timezone of the IMAGE (not the output). This is usually ' + u'discovered automatically by preprocessing but might need to be ' + u'specifically set if preprocessing does not properly detect or to ' + u'overwrite the detected time zone.')) + + function_group.add_argument( + '-t', '--text', dest='text_prepend', action='store', type=unicode, + default=u'', metavar='TEXT', help=( + u'Define a free form text string that is prepended to each path ' + u'to make it easier to distinguish one record from another in a ' + u'timeline (like c:\\, or host_w_c:\\)')) + + function_group.add_argument( + '--parsers', dest='parsers', type=str, action='store', default='', + metavar='PARSER_LIST', help=( + u'Define a list of parsers to use by the tool. This is a comma ' + u'separated list where each entry can be either a name of a parser ' + u'or a parser list. Each entry can be prepended with a minus sign ' + u'to negate the selection (exclude it). The list match is an ' + u'exact match while an individual parser matching is a case ' + u'insensitive substring match, with support for glob patterns. ' + u'Examples would be: "reg" that matches the substring "reg" in ' + u'all parser names or the glob pattern "sky[pd]" that would match ' + u'all parsers that have the string "skyp" or "skyd" in it\'s name. ' + u'All matching is case insensitive.')) + + info_group.add_argument( + '-h', '--help', action='help', help=u'Show this help message and exit.') + + info_group.add_argument( + '--logfile', action='store', metavar='FILENAME', dest='logfile', + type=unicode, default=u'', help=( + u'If defined all log messages will be redirected to this file ' + u'instead the default STDERR.')) + + function_group.add_argument( + '-p', '--preprocess', dest='preprocess', action='store_true', + default=False, help=( + u'Turn on preprocessing. Preprocessing is turned on by default ' + u'when parsing image files, however if a mount point is being ' + u'parsed then this parameter needs to be set manually.')) + + front_end.AddPerformanceOptions(performance_group) + + performance_group.add_argument( + '--workers', dest='workers', action='store', type=int, default=0, + help=(u'The number of worker threads [defaults to available system ' + u'CPU\'s minus three].')) + + # TODO: seems to be no longer used, remove. + # function_group.add_argument( + # '-i', '--image', dest='image', action='store_true', default=False, + # help=( + # 'Indicates that this is an image instead of a regular file. It is ' + # 'not necessary to include this option if -o (offset) is used, then ' + # 'this option is assumed. Use this when parsing an image with an ' + # 'offset of zero.')) + + front_end.AddVssProcessingOptions(deep_group) + + performance_group.add_argument( + '--single_thread', '--single-thread', '--single_process', + '--single-process', dest='single_process', action='store_true', + default=False, help=( + u'Indicate that the tool should run in a single process.')) + + function_group.add_argument( + '-f', '--file_filter', '--file-filter', dest='file_filter', + action='store', type=unicode, default=None, help=( + u'List of files to include for targeted collection of files to ' + u'parse, one line per file path, setup is /path|file - where each ' + u'element can contain either a variable set in the preprocessing ' + u'stage or a regular expression.')) + + deep_group.add_argument( + '--scan_archives', dest='open_files', action='store_true', default=False, + help=argparse.SUPPRESS) + + # This option is "hidden" for the time being, still left in there for testing + # purposes, but hidden from the tool usage and help messages. + # help=('Indicate that the tool should try to open files to extract embedd' + # 'ed files within them, for instance to extract files from compress' + # 'ed containers, etc. Be AWARE THAT THIS IS EXTREMELY SLOW.')) + + front_end.AddImageOptions(function_group) + + function_group.add_argument( + '--partition', dest='partition_number', action='store', type=int, + default=None, help=( + u'Choose a partition number from a disk image. This partition ' + u'number should correspond to the partion number on the disk ' + u'image, starting from partition 1.')) + + # Build the version information. + version_string = u'log2timeline - plaso back-end {0:s}'.format( + plaso.GetVersion()) + + info_group.add_argument( + '-v', '--version', action='version', version=version_string, + help=u'Show the current version of the back-end.') + + info_group.add_argument( + '--info', dest='show_info', action='store_true', default=False, + help=u'Print out information about supported plugins and parsers.') + + info_group.add_argument( + '--show_memory_usage', '--show-memory-usage', action='store_true', + default=False, dest='foreman_verbose', help=( + u'Indicates that basic memory usage should be included in the ' + u'output of the process monitor. If this option is not set the ' + u'tool only displays basic status and counter information.')) + + info_group.add_argument( + '--disable_worker_monitor', '--disable-worker-monitor', + action='store_false', default=True, dest='foreman_enabled', help=( + u'Turn off the foreman. The foreman monitors all worker processes ' + u'and periodically prints out information about all running workers.' + u'By default the foreman is run, but it can be turned off using this ' + u'parameter.')) + + front_end.AddExtractionOptions(function_group) + + function_group.add_argument( + '--output', dest='output_module', action='store', type=unicode, + default='', help=( + u'Bypass the storage module directly storing events according to ' + u'the output module. This means that the output will not be in the ' + u'pstorage format but in the format chosen by the output module. ' + u'[Please not this feature is EXPERIMENTAL at this time, use at ' + u'own risk (eg. sqlite output does not yet work)]')) + + function_group.add_argument( + '--serializer-format', '--serializer_format', dest='serializer_format', + action='store', default='proto', metavar='FORMAT', help=( + u'By default the storage uses protobufs for serializing event ' + u'objects. This parameter can be used to change that behavior. ' + u'The choices are "proto" and "json".')) + + front_end.AddInformationalOptions(info_group) + + arg_parser.add_argument( + 'output', action='store', metavar='STORAGE_FILE', nargs='?', + type=unicode, help=( + u'The path to the output file, if the file exists it will get ' + u'appended to.')) + + arg_parser.add_argument( + 'source', action='store', metavar='SOURCE', + nargs='?', type=unicode, help=( + u'The path to the source device, file or directory. If the source is ' + u'a supported storage media device or image file, archive file or ' + u'a directory, the files within are processed recursively.')) + + arg_parser.add_argument( + 'filter', action='store', metavar='FILTER', nargs='?', default=None, + type=unicode, help=( + u'A filter that can be used to filter the dataset before it ' + u'is written into storage. More information about the filters ' + u'and it\'s usage can be found here: http://plaso.kiddaland.' + u'net/usage/filters')) + + # Properly prepare the attributes according to local encoding. + if front_end.preferred_encoding == 'ascii': + logging.warning( + u'The preferred encoding of your system is ASCII, which is not optimal ' + u'for the typically non-ASCII characters that need to be parsed and ' + u'processed. The tool will most likely crash and die, perhaps in a way ' + u'that may not be recoverable. A five second delay is introduced to ' + u'give you time to cancel the runtime and reconfigure your preferred ' + u'encoding, otherwise continue at own risk.') + time.sleep(5) + + u_argv = [x.decode(front_end.preferred_encoding) for x in sys.argv] + sys.argv = u_argv + try: + options = arg_parser.parse_args() + except UnicodeEncodeError: + # If we get here we are attempting to print help in a "dumb" terminal. + print arg_parser.format_help().encode(front_end.preferred_encoding) + return False + + if options.timezone == 'list': + front_end.ListTimeZones() + return True + + if options.show_info: + front_end.ListPluginInformation() + return True + + format_str = ( + u'%(asctime)s [%(levelname)s] (%(processName)-10s) PID:%(process)d ' + u'<%(module)s> %(message)s') + + if options.debug: + if options.logfile: + logging.basicConfig( + level=logging.DEBUG, format=format_str, filename=options.logfile) + else: + logging.basicConfig(level=logging.DEBUG, format=format_str) + + logging_filter = LoggingFilter() + root_logger = logging.getLogger() + root_logger.addFilter(logging_filter) + elif options.logfile: + logging.basicConfig( + level=logging.INFO, format=format_str, filename=options.logfile) + else: + logging.basicConfig(level=logging.INFO, format=format_str) + + if not options.output: + arg_parser.print_help() + print u'' + arg_parser.print_usage() + print u'' + logging.error(u'Wrong usage: need to define an output.') + return False + + try: + front_end.ParseOptions(options) + front_end.SetStorageFile(options.output) + except errors.BadConfigOption as exception: + arg_parser.print_help() + print u'' + logging.error(u'{0:s}'.format(exception)) + return False + + # Configure the foreman (monitors workers). + front_end.SetShowMemoryInformation(show_memory=options.foreman_verbose) + front_end.SetRunForeman(run_foreman=options.foreman_enabled) + + try: + front_end.ProcessSource(options) + logging.info(u'Processing completed.') + + except (KeyboardInterrupt, errors.UserAbort): + logging.warning(u'Aborted by user.') + return False + + except errors.SourceScannerError as exception: + logging.warning(( + u'Unable to scan for a supported filesystem with error: {0:s}\n' + u'Most likely the image format is not supported by the ' + u'tool.').format(exception)) + return False + + return True + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/plaso/frontend/log2timeline_test.py b/plaso/frontend/log2timeline_test.py new file mode 100644 index 0000000..ab7dabe --- /dev/null +++ b/plaso/frontend/log2timeline_test.py @@ -0,0 +1,75 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the log2timeline front-end.""" + +import os +import shutil +import tempfile +import unittest + +from plaso.frontend import log2timeline +from plaso.frontend import test_lib +from plaso.lib import pfilter +from plaso.lib import storage + + +class Log2TimelineFrontendTest(test_lib.FrontendTestCase): + """Tests for the log2timeline front-end.""" + + def setUp(self): + """Sets up the objects used throughout the test.""" + # This is necessary since TimeRangeCache uses class members. + # TODO: remove this work around and properly fix TimeRangeCache. + pfilter.TimeRangeCache.ResetTimeConstraints() + + self._temp_directory = tempfile.mkdtemp() + + def tearDown(self): + """Cleans up the objects used throughout the test.""" + shutil.rmtree(self._temp_directory, True) + + def testGetStorageInformation(self): + """Tests the get storage information function.""" + test_front_end = log2timeline.Log2TimelineFrontend() + + options = test_lib.Options() + options.source = self._GetTestFilePath([u'ímynd.dd']) + + storage_file_path = os.path.join(self._temp_directory, u'plaso.db') + + test_front_end.ParseOptions(options) + test_front_end.SetStorageFile(storage_file_path=storage_file_path) + test_front_end.SetRunForeman(run_foreman=False) + + test_front_end.ProcessSource(options) + + try: + storage_file = storage.StorageFile(storage_file_path, read_only=True) + except IOError: + # This is not a storage file, we should fail. + self.assertTrue(False) + + # Make sure we can read an event out of the storage. + event_object = storage_file.GetSortedEntry() + self.assertIsNotNone(event_object) + + # TODO: add more tests that cover more of the functionality of the frontend. + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/frontend/pinfo.py b/plaso/frontend/pinfo.py new file mode 100755 index 0000000..66f5bf5 --- /dev/null +++ b/plaso/frontend/pinfo.py @@ -0,0 +1,266 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A simple dump information gathered from a plaso storage container. + +pinfo stands for Plaso INniheldurFleiriOrd or plaso contains more words. +""" +# TODO: To make YAML loading work. + +import argparse +import logging +import pprint +import sys + +from plaso.frontend import frontend +from plaso.lib import errors +from plaso.lib import timelib + + +class PinfoFrontend(frontend.AnalysisFrontend): + """Class that implements the pinfo front-end.""" + + def __init__(self): + """Initializes the front-end object.""" + input_reader = frontend.StdinFrontendInputReader() + output_writer = frontend.StdoutFrontendOutputWriter() + + super(PinfoFrontend, self).__init__(input_reader, output_writer) + + self._printer = pprint.PrettyPrinter(indent=8) + self._verbose = False + + def _AddCollectionInformation(self, lines_of_text, collection_information): + """Adds the lines of text that make up the collection information. + + Args: + lines_of_text: A list containing the lines of text. + collection_information: The collection information dict. + """ + filename = collection_information.get('file_processed', 'N/A') + time_of_run = collection_information.get('time_of_run', 0) + time_of_run = timelib.Timestamp.CopyToIsoFormat(time_of_run) + + lines_of_text.append(u'Storage file:\t\t{0:s}'.format( + self._storage_file_path)) + lines_of_text.append(u'Source processed:\t{0:s}'.format(filename)) + lines_of_text.append(u'Time of processing:\t{0:s}'.format(time_of_run)) + + lines_of_text.append(u'') + lines_of_text.append(u'Collection information:') + + for key, value in collection_information.items(): + if key not in ['file_processed', 'time_of_run']: + lines_of_text.append(u'\t{0:s} = {1!s}'.format(key, value)) + + def _AddCounterInformation( + self, lines_of_text, description, counter_information): + """Adds the lines of text that make up the counter information. + + Args: + lines_of_text: A list containing the lines of text. + description: The counter information description. + counter_information: The counter information dict. + """ + lines_of_text.append(u'') + lines_of_text.append(u'{0:s}:'.format(description)) + + for key, value in counter_information.most_common(): + lines_of_text.append(u'\tCounter: {0:s} = {1:d}'.format(key, value)) + + def _AddHeader(self, lines_of_text): + """Adds the lines of text that make up the header. + + Args: + lines_of_text: A list containing the lines of text. + """ + lines_of_text.append(u'-' * self._LINE_LENGTH) + lines_of_text.append(u'\t\tPlaso Storage Information') + lines_of_text.append(u'-' * self._LINE_LENGTH) + + def _AddStoreInformation(self, lines_of_text, store_information): + """Adds the lines of text that make up the store information. + + Args: + lines_of_text: A list containing the lines of text. + store_information: The store information dict. + """ + lines_of_text.append(u'') + lines_of_text.append(u'Store information:') + lines_of_text.append(u'\tNumber of available stores: {0:d}'.format( + store_information['Number'])) + + if not self._verbose: + lines_of_text.append( + u'\tStore information details omitted (to see use: --verbose)') + else: + for key, value in store_information.iteritems(): + if key not in ['Number']: + lines_of_text.append( + u'\t{0:s} =\n{1!s}'.format(key, self._printer.pformat(value))) + + def _FormatStorageInformation(self, info, storage_file, last_entry=False): + """Formats the storage information. + + Args: + info: The storage information object (instance of PreprocessObject). + storage_file: The storage file (instance of StorageFile). + last_entry: Optional boolean value to indicate this is the last + information entry. The default is False. + + Returns: + A string containing the formatted storage information. + """ + lines_of_text = [] + + collection_information = getattr(info, 'collection_information', None) + if collection_information: + self._AddHeader(lines_of_text) + self._AddCollectionInformation(lines_of_text, collection_information) + else: + lines_of_text.append(u'Missing collection information.') + + counter_information = getattr(info, 'counter', None) + if counter_information: + self._AddCounterInformation( + lines_of_text, u'Parser counter information', counter_information) + + counter_information = getattr(info, 'plugin_counter', None) + if counter_information: + self._AddCounterInformation( + lines_of_text, u'Plugin counter information', counter_information) + + store_information = getattr(info, 'stores', None) + if store_information: + self._AddStoreInformation(lines_of_text, store_information) + + information = u'\n'.join(lines_of_text) + + if not self._verbose: + preprocessing = ( + u'Preprocessing information omitted (to see use: --verbose).') + else: + preprocessing = u'Preprocessing information:\n' + for key, value in info.__dict__.items(): + if key == 'collection_information': + continue + elif key == 'counter' or key == 'stores': + continue + if isinstance(value, list): + preprocessing += u'\t{0:s} =\n{1!s}\n'.format( + key, self._printer.pformat(value)) + else: + preprocessing += u'\t{0:s} = {1!s}\n'.format(key, value) + + if not last_entry: + reports = u'' + elif storage_file.HasReports(): + reports = u'Reporting information omitted (to see use: --verbose).' + else: + reports = u'No reports stored.' + + if self._verbose and last_entry and storage_file.HasReports(): + report_list = [] + for report in storage_file.GetReports(): + report_list.append(report.GetString()) + reports = u'\n'.join(report_list) + + return u'\n'.join([ + information, u'', preprocessing, u'', reports, u'-+' * 40]) + + def GetStorageInformation(self): + """Returns a formatted storage information generator.""" + try: + storage_file = self.OpenStorageFile() + except IOError as exception: + logging.error( + u'Unable to open storage file: {0:s} with error: {1:s}'.format( + self._storage_file_path, exception)) + return + + list_of_storage_information = storage_file.GetStorageInformation() + if not list_of_storage_information: + yield '' + return + + last_entry = False + + for index, info in enumerate(list_of_storage_information): + if index + 1 == len(list_of_storage_information): + last_entry = True + yield self._FormatStorageInformation( + info, storage_file, last_entry=last_entry) + + def ParseOptions(self, options): + """Parses the options and initializes the front-end. + + Args: + options: the command line arguments (instance of argparse.Namespace). + + Raises: + BadConfigOption: if the options are invalid. + """ + super(PinfoFrontend, self).ParseOptions(options) + + self._verbose = getattr(options, 'verbose', False) + + +def Main(): + """Start the tool.""" + front_end = PinfoFrontend() + + usage = """ +Gives you information about the storage file, how it was +collected, what information was gained from the image, etc. + """ + arg_parser = argparse.ArgumentParser(description=usage) + + format_str = '[%(levelname)s] %(message)s' + logging.basicConfig(level=logging.INFO, format=format_str) + + arg_parser.add_argument( + '-v', '--verbose', dest='verbose', action='store_true', default=False, + help='Be extra verbose in the information printed out.') + + front_end.AddStorageFileOptions(arg_parser) + + options = arg_parser.parse_args() + + try: + front_end.ParseOptions(options) + except errors.BadConfigOption as exception: + arg_parser.print_help() + print u'' + logging.error(u'{0:s}'.format(exception)) + return False + + storage_information_found = False + for storage_information in front_end.GetStorageInformation(): + storage_information_found = True + print storage_information.encode(front_end.preferred_encoding) + + if not storage_information_found: + print u'No Plaso storage information found.' + + return True + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/plaso/frontend/pinfo_test.py b/plaso/frontend/pinfo_test.py new file mode 100644 index 0000000..e7ed674 --- /dev/null +++ b/plaso/frontend/pinfo_test.py @@ -0,0 +1,65 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for test pinfo front-end.""" + +import os +import unittest + +from plaso.frontend import pinfo +from plaso.frontend import test_lib + + +class PinfoFrontendTest(test_lib.FrontendTestCase): + """Tests for test pinfo front-end.""" + + def testGetStorageInformation(self): + """Tests the get storage information function.""" + test_front_end = pinfo.PinfoFrontend() + + options = test_lib.Options() + options.storage_file = os.path.join(self._TEST_DATA_PATH, 'psort_test.out') + + test_front_end.ParseOptions(options) + + storage_information_list = list(test_front_end.GetStorageInformation()) + + self.assertEquals(len(storage_information_list), 1) + + lines_of_text = storage_information_list[0].split(u'\n') + + expected_line_of_text = u'-' * 80 + self.assertEquals(lines_of_text[0], expected_line_of_text) + self.assertEquals(lines_of_text[2], expected_line_of_text) + + self.assertEquals(lines_of_text[1], u'\t\tPlaso Storage Information') + + expected_line_of_text = u'Storage file:\t\t{0:s}'.format( + options.storage_file) + self.assertEquals(lines_of_text[3], expected_line_of_text) + + self.assertEquals(lines_of_text[4], u'Source processed:\tsyslog') + + expected_line_of_text = u'Time of processing:\t2014-02-15T04:33:16+00:00' + self.assertEquals(lines_of_text[5], expected_line_of_text) + + self.assertEquals(lines_of_text[6], u'') + self.assertEquals(lines_of_text[7], u'Collection information:') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/frontend/plasm.py b/plaso/frontend/plasm.py new file mode 100755 index 0000000..0e756aa --- /dev/null +++ b/plaso/frontend/plasm.py @@ -0,0 +1,832 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the plasm front-end to plaso.""" + +import argparse +import hashlib +import logging +import operator +import os +import pickle +import sets +import sys +import textwrap + +from plaso import filters + +from plaso.frontend import frontend +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import output as output_lib +from plaso.lib import storage +from plaso.output import pstorage # pylint: disable=unused-import + + +class PlasmFrontend(frontend.AnalysisFrontend): + """Class that implements the psort front-end.""" + + def __init__(self): + """Initializes the front-end object.""" + input_reader = frontend.StdinFrontendInputReader() + output_writer = frontend.StdoutFrontendOutputWriter() + + super(PlasmFrontend, self).__init__(input_reader, output_writer) + + self._cluster_closeness = None + self._cluster_threshold = None + self._quiet = False + self._tagging_file_path = None + + self.mode = None + + def ClusterEvents(self): + """Clusters the event objects in the storage file.""" + clustering_engine = ClusteringEngine( + self._storage_file_path, self._cluster_threshold, + self._cluster_closeness) + clustering_engine.Run() + + def GroupEvents(self): + """Groups the event objects in the storage file. + + Raises: + RuntimeError: if a non-recoverable situation is encountered. + """ + if not self._quiet: + self._output_writer.Write(u'Grouping tagged events.\n') + + try: + storage_file = self.OpenStorageFile(read_only=False) + except IOError as exception: + raise RuntimeError( + u'Unable to open storage file: {0:s} with error: {1:s}.'.format( + self._storage_file_path, exception)) + + grouping_engine = GroupingEngine() + grouping_engine.Run(storage_file, quiet=self._quiet) + storage_file.Close() + + if not self._quiet: + self._output_writer.Write(u'Grouping DONE.\n') + + def TagEvents(self): + """Tags the event objects in the storage file.""" + tagging_engine = TaggingEngine( + self._storage_file_path, self._tagging_file_path, quiet=self._quiet) + tagging_engine.Run() + + def ParseOptions(self, options): + """Parses the options and initializes the front-end. + + Args: + options: the command line arguments (instance of argparse.Namespace). + + Raises: + BadConfigOption: if the options are invalid. + """ + super(PlasmFrontend, self).ParseOptions(options) + + self.mode = getattr(options, 'subcommand', None) + if not self.mode: + raise errors.BadConfigOption(u'Missing mode subcommand.') + + if self.mode not in ['cluster', 'group', 'tag']: + raise errors.BadConfigOption( + u'Unsupported mode subcommand: {0:s}.'.format(self.mode)) + + if self.mode == 'cluster': + self._cluster_threshold = getattr(options, 'cluster_threshold', None) + if not self._cluster_threshold: + raise errors.BadConfigOption(u'Missing cluster threshold value.') + + try: + self._cluster_threshold = int(self._cluster_threshold, 10) + except ValueError: + raise errors.BadConfigOption(u'Invalid cluster threshold value.') + + self._cluster_closeness = getattr(options, 'cluster_closeness', None) + if not self._cluster_closeness: + raise errors.BadConfigOption(u'Missing cluster closeness value.') + + try: + self._cluster_closeness = int(self._cluster_closeness, 10) + except ValueError: + raise errors.BadConfigOption(u'Invalid cluster closeness value.') + + elif self.mode == 'tag': + tagging_file_path = getattr(options, 'tag_filename', None) + if not tagging_file_path: + raise errors.BadConfigOption(u'Missing tagging file path.') + + if not os.path.isfile(tagging_file_path): + errors.BadConfigOption( + u'No such tagging file: {0:s}'.format(tagging_file_path)) + + self._tagging_file_path = tagging_file_path + + +def SetupStorage(input_file_path, pre_obj=None): + """Sets up the storage object. + + Attempts to initialize a storage file. If we fail on a IOError, for which + a common cause are typos, log a warning and gracefully exit. + + Args: + input_file_path: Filesystem path to the plaso storage container. + pre_obj: A plaso preprocessing object. + + Returns: + A storage.StorageFile object. + """ + try: + return storage.StorageFile( + input_file_path, pre_obj=pre_obj, read_only=False) + except IOError as exception: + logging.error(u'IO ERROR: {0:s}'.format(exception)) + else: + logging.error(u'Other Critical Failure Reading Files') + sys.exit(1) + + +def EventObjectGenerator(plaso_storage, quiet=False): + """Yields EventObject objects. + + Yields event_objects out of a StorageFile object. If the 'quiet' argument + is not present, it also outputs 50 '.'s indicating progress. + + Args: + plaso_storage: a storage.StorageFile object. + quiet: boolean value indicating whether to suppress progress output. + + Yields: + EventObject objects. + """ + total_events = plaso_storage.GetNumberOfEvents() + if total_events > 0: + events_per_dot = operator.floordiv(total_events, 80) + counter = 0 + else: + quiet = True + + event_object = plaso_storage.GetSortedEntry() + while event_object: + if not quiet: + counter += 1 + if counter % events_per_dot == 0: + sys.stdout.write(u'.') + sys.stdout.flush() + yield event_object + event_object = plaso_storage.GetSortedEntry() + + +def ParseTaggingFile(tag_input): + """Parses Tagging Input file. + + Parses a tagging input file and returns a dictionary of tags, where each + key represents a tag and each entry is a list of plaso filters. + + Args: + tag_input: filesystem path to the tagging input file. + + Returns: + A dictionary whose keys are tags and values are EventObjectFilter objects. + """ + with open(tag_input, 'rb') as tag_input_file: + tags = {} + current_tag = u'' + for line in tag_input_file: + line_rstrip = line.rstrip() + line_strip = line_rstrip.lstrip() + if not line_strip or line_strip.startswith(u'#'): + continue + if not line_rstrip[0].isspace(): + current_tag = line_rstrip + tags[current_tag] = [] + else: + if not current_tag: + continue + compiled_filter = filters.GetFilter(line_strip) + if compiled_filter: + if compiled_filter not in tags[current_tag]: + tags[current_tag].append(compiled_filter) + else: + logging.warning(u'Tag "{0:s}" contains invalid filter: {1:s}'.format( + current_tag, line_strip)) + return tags + + +class TaggingEngine(object): + """Class that defines a tagging engine.""" + + def __init__(self, target_filename, tag_input, quiet=False): + """Initializes the tagging engine object. + + Args: + target_filename: filename for a Plaso storage file to be tagged. + tag_input: filesystem path to the tagging input file. + quiet: Optional boolean value to indicate the progress output should + be suppressed. The default is False. + """ + self.target_filename = target_filename + self.tag_input = tag_input + self._quiet = quiet + + def Run(self): + """Iterates through a Plaso Store file, tagging events according to the + tagging input file specified on the command line. It writes the tagging + information to the Plaso Store file.""" + pre_obj = event.PreprocessObject() + pre_obj.collection_information = {} + pre_obj.collection_information['file_processed'] = self.target_filename + pre_obj.collection_information['method'] = u'Applying tags.' + pre_obj.collection_information['tag_file'] = self.tag_input + pre_obj.collection_information['tagging_engine'] = u'plasm' + + if not self._quiet: + sys.stdout.write(u'Applying tags...\n') + with SetupStorage(self.target_filename, pre_obj) as store: + tags = ParseTaggingFile(self.tag_input) + num_tags = 0 + event_tags = [] + for event_object in EventObjectGenerator(store, self._quiet): + matched_tags = [] + for tag, my_filters in tags.iteritems(): + for my_filter in my_filters: + if my_filter.Match(event_object): + matched_tags.append(tag) + # Don't want to evaluate other tags once a tag is discovered. + break + if len(matched_tags) > 0: + event_tag = event.EventTag() + event_tag.store_number = getattr(event_object, 'store_number') + event_tag.store_index = getattr(event_object, 'store_index') + event_tag.comment = u'Tag applied by PLASM tagging engine' + event_tag.tags = matched_tags + event_tags.append(event_tag) + num_tags += 1 + store.StoreTagging(event_tags) + + if not self._quiet: + sys.stdout.write(u'DONE (applied {} tags)\n'.format(num_tags)) + + +class GroupingEngine(object): + """Class that defines a grouping engine.""" + + def _GroupEvents(self, storage_file, tags, quiet=False): + """Separates each tag list into groups, and writes them to the Plaso Store. + + Args: + storage_file: the storage file (instance of StorageFile). + tags: dictionary of the form {tag: [event_object, ...]}. + quiet: suppress the progress output (default: False). + """ + # TODO(ojensen): make this smarter - for now, separates via time interval. + time_interval = 1000000 # 1 second. + groups = [] + for tag in tags: + if not quiet: + sys.stdout.write(u' proccessing tag "{0:s}"...\n'.format(tag)) + locations = tags[tag] + last_time = 0 + groups_in_tag = 0 + for location in locations: + store_number, store_index = location + # TODO(ojensen): getting higher number event_objects seems to be slow. + event_object = storage_file.GetEventObject(store_number, store_index) + if not hasattr(event_object, 'timestamp'): + continue + timestamp = getattr(event_object, 'timestamp') + if timestamp - last_time > time_interval: + groups_in_tag += 1 + groups.append(type('obj', (object,), { + 'name': u'{0:s}:{1:d}'.format(tag, groups_in_tag), + 'category': tag, + 'events': [location]})) + else: + groups[-1].events.append(location) + last_time = timestamp + + return groups + + # TODO: move this functionality to storage. + def _ReadTags(self, storage_file): + """Iterates through an opened Plaso Store, creating a dictionary of tags + pointing to a list of events. + + Args: + storage_file: the storage file (instance of StorageFile). + """ + all_tags = {} + for event_tag in storage_file.GetTagging(): + tags = event_tag.tags + location = (event_tag.store_number, event_tag.store_index) + for tag in tags: + if tag in all_tags: + all_tags[tag].append(location) + else: + all_tags[tag] = [location] + return all_tags + + def Run(self, storage_file, quiet=False): + """Iterates through a tagged Plaso Store file, grouping events with the same + tag into groups indicating a single instance of an action. It writes the + grouping information to the Plaso Store file. + + Args: + storage_file: the storage file (instance of StorageFile). + quiet: Optional boolean value to indicate the progress output should + be suppressed. The default is False. + """ + if not storage_file.HasTagging(): + logging.error(u'Plaso storage file does not contain tagged events') + return + + tags = self._ReadTags(storage_file) + groups = self._GroupEvents(storage_file, tags, quiet) + + storage_file.StoreGrouping(groups) + + +class ClusteringEngine(object): + """Clusters events in a Plaso Store to assist Tag Input creation. + + Most methods in this class are staticmethods, to avoid relying excessively on + internal state, and to maintain a clear description of which method acts on + what data. + """ + + IGNORE_BASE = frozenset([ + 'hostname', 'timestamp_desc', 'plugin', 'parser', 'user_sid', + 'registry_type', 'computer_name', 'offset', 'allocated', 'file_size', + 'record_number']) + + def __init__(self, target_filename, threshold, closeness): + """Constructor for the Clustering Engine. + + Args: + target_filename: filename for a Plaso storage file to be clustered. + threshold: support threshold for pruning attributes and event types. + closeness: number of milliseconds to cut off the closeness function. + """ + self.target_filename = target_filename + self.threshold = threshold + self.closeness = closeness + sys.stdout.write("Support threshold: {0:d}\nCloseness: {1:d}ms\n\n".format( + threshold, closeness)) + + self.ignore = False + self.frequent_words = [] + self.vector_size = 20000 + + @staticmethod + def HashFile(filename, block_size=2**20): + """Calculates an md5sum of a file from a given filename. + + Returns an MD5 (hash) in ASCII characters, used for naming incremental + progress files that are written to disk. + + Args: + filename: the file to be hashed. + block_size: (optional) block size. + """ + md5 = hashlib.md5() + with open(filename, 'rb') as f: + while True: + data = f.read(block_size) + if not data: + break + md5.update(data) + return md5.hexdigest() + + @staticmethod + def StringJoin(first, second): + """Joins two strings together with a separator. + + In spite of being fairly trivial, this is separated out as a function of + its own to ensure it stays consistent, as it happens in multiple places in + the code. + + Args: + first: first string. + second: second string. + """ + return u':||:'.join([unicode(first), unicode(second)]) + + @staticmethod + def PreHash(field_name, attribute): + """Constructs a string fit to be hashed from an event_object attribute. + + Takes both the attribute's name and value, and produces a consistent string + representation. This string can then be hashed to produce a consistent + name/value hash (see hash_attr). + + Args: + field_name: an event_object attribute name. + attribute: the corresponding event_object attribute. + """ + if type(attribute) in [dict, sets.Set]: + value = repr(sorted(attribute.items())) + else: + value = unicode(attribute) + return ClusteringEngine.StringJoin(field_name, value) + + @staticmethod + def HashAttr(field_name, attribute, vector_size): + """Consistently hashes an event_object attribute/value pair. + + Uses pre_hash to generate a consistent string representation of the + attribute, and then hashes and mods it down to fit within the vector_size. + + Args: + field_name: an event_object attribute name. + attribute: the corresponding event_object attribute. + """ + return hash(ClusteringEngine.PreHash(field_name, attribute)) % vector_size + + @staticmethod + def EventRepresentation(event_object, ignore, frequent_words=None): + """Constructs a consistent representation of an event_object. + + Returns a dict representing our view of an event_object, stripping out + attributes we ignore. If the frequent_words parameter is set, this strips + out any attribute not listed therein as well. Attribute list order is + undefined, i.e. event_object list attributes are treated as sets instead of + lists. + + Args: + event_object: a Plaso event_object. + ignore: a list or set of event_object attributes to ignore. + frequent_words: (optional) whitelist of attributes not to ignore. + """ + if not frequent_words: + frequent_words = [] + + event_field_names = event_object.GetAttributes().difference(ignore) + representation = {} + for field_name in event_field_names: + attribute = getattr(event_object, field_name) + if hasattr(attribute, '__iter__'): + if isinstance(attribute, dict): + indices = sorted(attribute.keys()) + else: + indices = range(len(attribute)) + for index in indices: + # quick fix to ignore list order. + index_identifier = index if isinstance(attribute, dict) else '' + subfield_name = ':plasm-sub:'.join( + [field_name, unicode(index_identifier)]) + if not frequent_words or ClusteringEngine.StringJoin( + subfield_name, attribute[index]) in frequent_words: + representation[subfield_name] = attribute[index] + else: + if not frequent_words or ClusteringEngine.StringJoin( + field_name, attribute) in frequent_words: + representation[field_name] = attribute + return representation + + def EventObjectRepresentationGenerator(self, filename, frequent_words=None): + """Yields event_representations. + + Yields event_representations from a plaso store. Essentially it simply wraps + the EventObjectGenerator and yields event_representations of the resulting + event_objects. If frequent_words is set, the event representation will + exclude any attributes not listed in the frequent_words list. + + Args: + filename: a Plaso Store filename. + frequent_words: (optional) whitelist of attributes not to ignore. + """ + with SetupStorage(filename) as store: + for event_object in EventObjectGenerator(store): + if not self.ignore: + self.ignore = event_object.COMPARE_EXCLUDE.union(self.IGNORE_BASE) + yield ClusteringEngine.EventRepresentation( + event_object, self.ignore, frequent_words) + + def NoDuplicates(self, dump_filename): + """Saves a de-duped Plaso Storage. + + This goes through the Plaso storage file, and saves a new dump with + duplicates removed. The filename is '.[dump_hash]_dedup', and is returned + at the end of the function. Note that if this function is interrupted, + incomplete results are recorded and this file must be deleted or subsequent + runs will use this incomplete data. + + Args: + dump_filename: the filename of the Plaso Storage to be deduped. + """ + sys.stdout.write(u'Removing duplicates...\n') + sys.stdout.flush() + # Whether these incremental files should remain a feature or not is still + # being decided. They're just here for now to make development faster. + nodup_filename = '.{}_dedup'.format(self.plaso_hash) + if os.path.isfile(nodup_filename): + sys.stdout.write(u'Using previously calculated results.\n') + else: + with SetupStorage(dump_filename) as store: + total_events = store.GetNumberOfEvents() + events_per_dot = operator.floordiv(total_events, 80) + formatter_cls = output_lib.GetOutputFormatter('Pstorage') + store_dedup = open(nodup_filename, 'wb') + formatter = formatter_cls(store, store_dedup) + with output_lib.EventBuffer( + formatter, check_dedups=True) as output_buffer: + event_object = formatter.FetchEntry() + counter = 0 + while event_object: + output_buffer.Append(event_object) + counter += 1 + if counter % events_per_dot == 0: + sys.stdout.write(u'.') + sys.stdout.flush() + event_object = formatter.FetchEntry() + sys.stdout.write(u'\n') + return nodup_filename + + def ConstructHashVector(self, nodup_filename, vector_size): + """Constructs the vector which tallies the hashes of attributes. + + The purpose of this vector is to save memory. Since many attributes are + fairly unique, we first hash them and keep a count of how many times the + hash appears. Later when constructing our vocabulary, we can ignore any + attributes whose hash points to a value in this vector smaller than the + support threshold value, since we are guaranteed that it appears in the + data at most this tally number of times. + + Args: + nodup_filename: the filename of a de-duplicated plaso storage file. + vector_size: size of this vector. + """ + sys.stdout.write(u'Constructing word vector...\n') + sys.stdout.flush() + vector_filename = '.{0:s}_vector_{1:s}'.format( + self.plaso_hash, vector_size) + if os.path.isfile(vector_filename): + sys.stdout.write(u'Using previously calculated results.\n') + x = open(vector_filename, 'rb') + vector = pickle.load(x) + x.close() + else: + vector = [0]*vector_size + for representation in self.EventObjectRepresentationGenerator( + nodup_filename): + for field_name, attribute in representation.iteritems(): + index = ClusteringEngine.HashAttr(field_name, attribute, vector_size) + vector[index] += 1 + x = open(vector_filename, 'wb') + pickle.dump(vector, x) + x.close() + sys.stdout.write(u'\n') + return vector + + def FindFrequentWords(self, nodup_filename, threshold, vector=None): + """Constructs a list of attributes which appear "often". + + This goes through a plaso store, and finds all name-attribute pairs which + appear no less than the support threshold value number of times. If + available it uses the hash vector in order to ignore attributes and save + memory. + + Args: + nodup_filename: the filename of a de-duplicated plaso storage file. + threshold: the support threshold value. + vector: (optional) vector of hash tallies. + """ + if not vector: + vector = [] + + sys.stdout.write(u'Constructing 1-dense clusters... \n') + sys.stdout.flush() + frequent_filename = '.{0:s}_freq_{1:s}'.format( + self.plaso_hash, str(threshold)) + if os.path.isfile(frequent_filename): + sys.stdout.write(u'Using previously calculated results.\n') + x = open(frequent_filename, 'rb') + frequent_words = pickle.load(x) + x.close() + else: + word_count = {} + vector_size = len(vector) + for representation in self.EventObjectRepresentationGenerator( + nodup_filename): + for field_name, attribute in representation.iteritems(): + word = ClusteringEngine.PreHash(field_name, attribute) + keep = vector[hash(word) % vector_size] > threshold + if not vector_size or keep: + if word in word_count: + word_count[word] += 1 + else: + word_count[word] = 1 + wordlist = [word for word in word_count if word_count[word] >= threshold] + frequent_words = sets.Set(wordlist) + x = open(frequent_filename, 'wb') + pickle.dump(frequent_words, x) + x.close() + sys.stdout.write(u'\n') + return frequent_words + + def BuildEventTypes(self, nodup_filename, threshold, frequent_words): + """Builds out the event_types from the frequent attributes. + + This uses the frequent words set in order to ignore attributes from plaso + events and thereby create event_types (events which have infrequent + attributes ignored). Currently event types which do not appear at least + as ofter as the support threshold dictates are ignored, although whether + this is what we actually want is still under consideration. Returns the + list of event types, as well as a reverse-lookup structure. + + Args: + nodup_filename: the filename of a de-duplicated plaso storage file. + threshold: the support threshold value. + frequent_words: the set of attributes not to ignore. + """ + sys.stdout.write(u'Calculating event type candidates...\n') + sys.stdout.flush() + eventtype_filename = ".{0:s}_evtt_{1:s}".format( + self.plaso_hash, str(threshold)) + if os.path.isfile(eventtype_filename): + sys.stdout.write(u'Using previously calculated results.\n') + x = open(eventtype_filename, 'rb') + evttypes = pickle.load(x) + evttype_indices = pickle.load(x) + x.close() + else: + evttype_candidates = {} + for representation in self.EventObjectRepresentationGenerator( + nodup_filename, frequent_words=frequent_words): + candidate = repr(representation) + if candidate in evttype_candidates: + evttype_candidates[candidate] += 1 + else: + evttype_candidates[candidate] = 1 + sys.stdout.write(u'\n') + # clean up memory a little + sys.stdout.write(u'Pruning event type candidates...') + sys.stdout.flush() + evttypes = [] + evttype_indices = {} + for candidate, score in evttype_candidates.iteritems(): + if score < threshold: + evttype_indices[candidate] = len(evttypes) + evttypes.append(candidate) + del evttype_candidates + + # write everything out + x = open(eventtype_filename, 'wb') + pickle.dump(evttypes, x) + pickle.dump(evttype_indices, x) + x.close() + sys.stdout.write(u'\n') + return (evttypes, evttype_indices) + + def Run(self): + """Iterates through a tagged Plaso Store file, attempting to cluster events + into groups that tend to happen together, to help creating Tag Input files. + Future work includes the ability to parse multiple Plaso Store files at + once. By default this will write incremental progress to dotfiles in the + current directory.""" + self.plaso_hash = ClusteringEngine.HashFile(self.target_filename) + self.nodup_filename = self.NoDuplicates(self.target_filename) + self.vector = self.ConstructHashVector( + self.nodup_filename, self.vector_size) + self.frequent_words = self.FindFrequentWords( + self.nodup_filename, self.threshold, self.vector) + (self.event_types, self.event_type_indices) = self.BuildEventTypes( + self.nodup_filename, self.threshold, self.frequent_words) + # Next step, clustering the event types + + # TODO: implement clustering. + + +def Main(): + """The main application function.""" + front_end = PlasmFrontend() + + epilog_tag = (""" + Notes: + + When applying tags, a tag input file must be given. Currently, + the format of this file is simply the tag name, followed by + indented lines indicating conditions for the tag, treating any + lines beginning with # as comments. For example, a valid tagging + input file might look like this:' + + ------------------------------ + Obvious Malware + # anything with 'malware' in the name or path + filename contains 'malware' + + # anything with the malware datatype + datatype is 'windows:malware:this_is_not_a_real_datatype' + + File Download + timestamp_desc is 'File Downloaded' + ------------------------------ + + Tag files can be found in the "extra" directory of plaso. + """) + + epilog_group = (""" + When applying groups, the Plaso storage file *must* contain tags, + as only tagged events are grouped. Plasm can be run such that it + both applies tags and applies groups, in which case an untagged + Plaso storage file may be used, since tags will be applied before + the grouping is calculated. + """) + + epilog_main = (""" + For help with a specific action, use "plasm.py {cluster,group,tag} -h". + """) + + description = ( + u'PLASM (Plaso Langar Ad Safna Minna)- Application to tag and group ' + u'Plaso storage files.') + + arg_parser = argparse.ArgumentParser( + description=textwrap.dedent(description), + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent(epilog_main)) + + arg_parser.add_argument( + '-q', '--quiet', action='store_true', dest='quiet', default=False, + help='Suppress nonessential output.') + + subparsers = arg_parser.add_subparsers(dest='subcommand') + + cluster_subparser = subparsers.add_parser( + 'cluster', formatter_class=argparse.RawDescriptionHelpFormatter) + + cluster_subparser.add_argument( + '--closeness', action='store', type=int, metavar='MSEC', + dest='cluster_closeness', default=5000, help=( + 'Number of miliseconds before we stop considering two ' + 'events to be at all "close" to each other')) + + cluster_subparser.add_argument( + '--threshold', action='store', type=int, metavar='NUMBER', + dest='cluster_threshold', default=5, + help='Support threshold for pruning attributes.') + + front_end.AddStorageFileOptions(cluster_subparser) + + group_subparser = subparsers.add_parser( + 'group', formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent(epilog_group)) + + front_end.AddStorageFileOptions(group_subparser) + + tag_subparser = subparsers.add_parser( + 'tag', formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent(epilog_tag)) + + tag_subparser.add_argument( + '--tagfile', '--tag_file', '--tag-file', action='store', type=unicode, + metavar='FILE', dest='tag_filename', help=( + 'Name of the file containing a description of tags and rules ' + 'for tagging events.')) + + front_end.AddStorageFileOptions(tag_subparser) + + options = arg_parser.parse_args() + + try: + front_end.ParseOptions(options) + except errors.BadConfigOption as exception: + arg_parser.print_help() + print u'' + logging.error(u'{0:s}'.format(exception)) + return False + + if front_end.mode == 'cluster': + front_end.ClusterEvents() + + elif front_end.mode == 'group': + front_end.GroupEvents() + + elif front_end.mode == 'tag': + front_end.TagEvents() + + return True + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/plaso/frontend/plasm_test.py b/plaso/frontend/plasm_test.py new file mode 100644 index 0000000..fd73236 --- /dev/null +++ b/plaso/frontend/plasm_test.py @@ -0,0 +1,195 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the plasm front-end.""" + +import os +import shutil +import tempfile +import unittest + +from plaso.engine import queue +from plaso.frontend import plasm +from plaso.frontend import test_lib +from plaso.lib import event +from plaso.lib import pfilter +from plaso.lib import storage +from plaso.multi_processing import multi_process + + +class TestEvent(event.EventObject): + DATA_TYPE = 'test:plasm:1' + + def __init__(self, timestamp, filename='/dev/null', stuff='bar'): + super(TestEvent, self).__init__() + self.timestamp = timestamp + self.filename = filename + self.timestamp_desc = 'Last Written' + self.parser = 'TestEvent' + self.display_name = 'fake:{}'.format(filename) + self.stuff = stuff + + +class PlasmTest(test_lib.FrontendTestCase): + """Tests for the plasm front-end.""" + + def setUp(self): + """Sets up the objects used throughout the test.""" + self._temp_directory = tempfile.mkdtemp() + self._storage_filename = os.path.join(self._temp_directory, 'plaso.db') + self._tag_input_filename = os.path.join(self._temp_directory, 'input1.tag') + + tag_input_file = open(self._tag_input_filename, 'wb') + tag_input_file.write('\n'.join([ + 'Test Tag', + ' filename contains \'/tmp/whoaaaa\'', + ' parser is \'TestEvent\' and stuff is \'dude\''])) + tag_input_file.close() + + pfilter.TimeRangeCache.ResetTimeConstraints() + + # TODO: add upper queue limit. + test_queue = multi_process.MultiProcessingQueue() + test_queue_producer = queue.ItemQueueProducer(test_queue) + test_queue_producer.ProduceItems([ + TestEvent(0), + TestEvent(1000), + TestEvent(2000000, '/tmp/whoaaaaa'), + TestEvent(2500000, '/tmp/whoaaaaa'), + TestEvent(5000000, '/tmp/whoaaaaa', 'dude')]) + test_queue_producer.SignalEndOfInput() + + storage_writer = storage.StorageFileWriter( + test_queue, self._storage_filename) + storage_writer.WriteEventObjects() + + self._storage_file = storage.StorageFile(self._storage_filename) + self._storage_file.SetStoreLimit() + + def tearDown(self): + """Cleans up the objects used throughout the test.""" + shutil.rmtree(self._temp_directory, True) + + def testTagParsing(self): + """Test if plasm can parse Tagging Input files.""" + tags = plasm.ParseTaggingFile(self._tag_input_filename) + self.assertEquals(len(tags), 1) + self.assertTrue('Test Tag' in tags) + self.assertEquals(len(tags['Test Tag']), 2) + + def testInvalidTagParsing(self): + """Test what happens when Tagging Input files contain invalid conditions.""" + tag_input_filename = os.path.join(self._temp_directory, 'input2.tag') + + tag_input_file = open(tag_input_filename, 'wb') + tag_input_file.write('\n'.join([ + 'Invalid Tag', ' my hovercraft is full of eels'])) + tag_input_file.close() + + tags = plasm.ParseTaggingFile(tag_input_filename) + self.assertEquals(len(tags), 1) + self.assertTrue('Invalid Tag' in tags) + self.assertEquals(len(tags['Invalid Tag']), 0) + + def testMixedValidityTagParsing(self): + """Tagging Input file contains a mix of valid and invalid conditions.""" + tag_input_filename = os.path.join(self._temp_directory, 'input3.tag') + + tag_input_file = open(tag_input_filename, 'wb') + tag_input_file.write('\n'.join([ + 'Semivalid Tag', ' filename contains \'/tmp/whoaaaa\'', + ' Yandelavasa grldenwi stravenka'])) + tag_input_file.close() + + tags = plasm.ParseTaggingFile(tag_input_filename) + self.assertEquals(len(tags), 1) + self.assertTrue('Semivalid Tag' in tags) + self.assertEquals(len(tags['Semivalid Tag']), 1) + + def testIteratingOverPlasoStore(self): + """Tests the plaso storage iterator""" + counter = 0 + for _ in plasm.EventObjectGenerator(self._storage_file, quiet=True): + counter += 1 + self.assertEquals(counter, 5) + + self._storage_file.Close() + + pfilter.TimeRangeCache.ResetTimeConstraints() + self._storage_file = storage.StorageFile(self._storage_filename) + self._storage_file.SetStoreLimit() + + counter = 0 + for _ in plasm.EventObjectGenerator(self._storage_file, quiet=False): + counter += 1 + self.assertEquals(counter, 5) + + def testTaggingEngine(self): + """Tests the Tagging engine's functionality.""" + self.assertFalse(self._storage_file.HasTagging()) + tagging_engine = plasm.TaggingEngine( + self._storage_filename, self._tag_input_filename, quiet=True) + tagging_engine.Run() + test = storage.StorageFile(self._storage_filename) + self.assertTrue(test.HasTagging()) + tagging = test.GetTagging() + count = 0 + for tag_event in tagging: + count += 1 + self.assertEquals(tag_event.tags, ['Test Tag']) + self.assertEquals(count, 3) + + def testGroupingEngineUntagged(self): + """Grouping engine should do nothing if dealing with untagged storage.""" + storage_file = storage.StorageFile(self._storage_filename, read_only=False) + grouping_engine = plasm.GroupingEngine() + grouping_engine.Run(storage_file, quiet=True) + storage_file.Close() + + storage_file = storage.StorageFile(self._storage_filename, read_only=True) + + self.assertFalse(storage_file.HasGrouping()) + + storage_file.Close() + + def testGroupingEngine(self): + """Tests the Grouping engine's functionality.""" + pfilter.TimeRangeCache.ResetTimeConstraints() + tagging_engine = plasm.TaggingEngine( + self._storage_filename, self._tag_input_filename, quiet=True) + tagging_engine.Run() + + storage_file = storage.StorageFile(self._storage_filename, read_only=False) + grouping_engine = plasm.GroupingEngine() + grouping_engine.Run(storage_file, quiet=True) + storage_file.Close() + + storage_file = storage.StorageFile(self._storage_filename, read_only=True) + + storage_file.SetStoreLimit() + self.assertTrue(storage_file.HasGrouping()) + groups = storage_file.GetGrouping() + count = 0 + for group_event in groups: + count += 1 + self.assertEquals(group_event.category, 'Test Tag') + self.assertEquals(count, 2) + + storage_file.Close() + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/frontend/pprof.py b/plaso/frontend/pprof.py new file mode 100755 index 0000000..4a70095 --- /dev/null +++ b/plaso/frontend/pprof.py @@ -0,0 +1,364 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test run for a single file and a display of how many events are collected.""" + +import argparse +import collections +import cProfile +import logging +import os +import pstats +import sys +import time + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.proto import transmission_pb2 +from dfvfs.resolver import resolver as path_spec_resolver +from dfvfs.serializer import protobuf_serializer + +from google.protobuf import text_format + +try: + # Support version 1.X of IPython. + # pylint: disable=no-name-in-module + from IPython.terminal.embed import InteractiveShellEmbed +except ImportError: + # Support version older than 1.X of IPython. + # pylint: disable=no-name-in-module + from IPython.frontend.terminal.embed import InteractiveShellEmbed + +import pyevt +import pyevtx +import pylnk +import pymsiecf +import pyregf + +import plaso +from plaso.engine import engine +from plaso.engine import queue +from plaso.engine import single_process +from plaso.frontend import psort +from plaso.frontend import utils as frontend_utils + + +# TODO: Remove this after the dfVFS integration. +# TODO: Make sure we don't need to implement the method _ConsumeItem, or +# to have that not as an abstract method. +# pylint: disable=abstract-method +class PprofEventObjectQueueConsumer(queue.EventObjectQueueConsumer): + """Class that implements an event object queue consumer for pprof.""" + + def __init__(self, queue_object): + """Initializes the queue consumer. + + Args: + queue_object: the queue object (instance of Queue). + """ + super(PprofEventObjectQueueConsumer, self).__init__(queue_object) + self.counter = collections.Counter() + self.parsers = [] + self.plugins = [] + + def _ConsumeEventObject(self, event_object, **unused_kwargs): + """Consumes an event object callback for ConsumeEventObject.""" + parser = getattr(event_object, 'parser', u'N/A') + if parser not in self.parsers: + self.parsers.append(parser) + + plugin = getattr(event_object, 'plugin', u'N/A') + if plugin not in self.plugins: + self.plugins.append(plugin) + + self.counter[parser] += 1 + if plugin != u'N/A': + self.counter[u'[Plugin] {}'.format(plugin)] += 1 + self.counter['Total'] += 1 + + +def PrintHeader(options): + """Print header information, including library versions.""" + print frontend_utils.FormatHeader('File Parsed') + print u'{:>20s}'.format(options.file_to_parse) + + print frontend_utils.FormatHeader('Versions') + print frontend_utils.FormatOutputString('plaso engine', plaso.GetVersion()) + print frontend_utils.FormatOutputString('pyevt', pyevt.get_version()) + print frontend_utils.FormatOutputString('pyevtx', pyevtx.get_version()) + print frontend_utils.FormatOutputString('pylnk', pylnk.get_version()) + print frontend_utils.FormatOutputString('pymsiecf', pymsiecf.get_version()) + print frontend_utils.FormatOutputString('pyregf', pyregf.get_version()) + + if options.filter: + print frontend_utils.FormatHeader('Filter Used') + print frontend_utils.FormatOutputString('Filter String', options.filter) + + if options.parsers: + print frontend_utils.FormatHeader('Parser Filter Used') + print frontend_utils.FormatOutputString('Parser String', options.parsers) + + +def ProcessStorage(options): + """Process a storage file and produce profile results. + + Args: + options: the command line arguments (instance of argparse.Namespace). + + Returns: + The profiling statistics or None on error. + """ + storage_parameters = options.storage.split() + storage_parameters.append(options.file_to_parse) + + if options.filter: + storage_parameters.append(options.filter) + + if options.verbose: + # TODO: why not move this functionality into psort? + profiler = cProfile.Profile() + profiler.enable() + else: + time_start = time.time() + + # Call psort and process output. + return_value = psort.Main(storage_parameters) + + if options.verbose: + profiler.disable() + else: + time_end = time.time() + + if return_value: + print u'Parsed storage file.' + else: + print u'It appears the storage file may not have processed correctly.' + + if options.verbose: + return GetStats(profiler) + else: + print frontend_utils.FormatHeader('Time Used') + print u'{:>20f}s'.format(time_end - time_start) + + +def ProcessFile(options): + """Process a file and produce profile results.""" + if options.proto_file and os.path.isfile(options.proto_file): + with open(options.proto_file) as fh: + proto_string = fh.read() + + proto = transmission_pb2.PathSpec() + try: + text_format.Merge(proto_string, proto) + except text_format.ParseError as exception: + logging.error(u'Unable to parse file, error: {}'.format( + exception)) + sys.exit(1) + + serializer = protobuf_serializer.ProtobufPathSpecSerializer + path_spec = serializer.ReadSerializedObject(proto) + else: + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=options.file_to_parse) + + file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) + + if file_entry is None: + logging.error(u'Unable to open file: {0:s}'.format(options.file_to_parse)) + sys.exit(1) + + # Set few options the engine expects to be there. + # TODO: Can we rather set this directly in argparse? + options.single_process = True + options.debug = False + options.text_prepend = u'' + + # Set up the engine. + # TODO: refactor and add queue limit. + collection_queue = single_process.SingleProcessQueue() + storage_queue = single_process.SingleProcessQueue() + parse_error_queue = single_process.SingleProcessQueue() + engine_object = engine.BaseEngine( + collection_queue, storage_queue, parse_error_queue) + + # Create a worker. + worker_object = engine_object.CreateExtractionWorker('0') + # TODO: add support for parser_filter_string. + worker_object.InitalizeParserObjects() + + if options.verbose: + profiler = cProfile.Profile() + profiler.enable() + else: + time_start = time.time() + worker_object.ParseFileEntry(file_entry) + + if options.verbose: + profiler.disable() + else: + time_end = time.time() + + engine_object.SignalEndOfInputStorageQueue() + + event_object_consumer = PprofEventObjectQueueConsumer(storage_queue) + event_object_consumer.ConsumeEventObjects() + + if not options.verbose: + print frontend_utils.FormatHeader('Time Used') + print u'{:>20f}s'.format(time_end - time_start) + + print frontend_utils.FormatHeader('Parsers Loaded') + # Accessing protected member. + # pylint: disable=protected-access + plugins = [] + for parser_object in sorted(worker_object._parser_objects): + print frontend_utils.FormatOutputString('', parser_object.NAME) + parser_plugins = getattr(parser_object, '_plugins', []) + plugins.extend(parser_plugins) + + print frontend_utils.FormatHeader('Plugins Loaded') + for plugin in sorted(plugins): + if isinstance(plugin, basestring): + print frontend_utils.FormatOutputString('', plugin) + else: + plugin_string = getattr(plugin, 'NAME', u'N/A') + print frontend_utils.FormatOutputString('', plugin_string) + + print frontend_utils.FormatHeader('Parsers Used') + for parser in sorted(event_object_consumer.parsers): + print frontend_utils.FormatOutputString('', parser) + + print frontend_utils.FormatHeader('Plugins Used') + for plugin in sorted(event_object_consumer.plugins): + print frontend_utils.FormatOutputString('', plugin) + + print frontend_utils.FormatHeader('Counter') + for key, value in event_object_consumer.counter.most_common(): + print frontend_utils.FormatOutputString(key, value) + + if options.verbose: + return GetStats(profiler) + + +def GetStats(profiler): + """Print verbose information from profiler and return a stats object.""" + stats = pstats.Stats(profiler, stream=sys.stdout) + print frontend_utils.FormatHeader('Profiler') + + print '\n{:-^20}'.format(' Top 10 Time Spent ') + stats.sort_stats('cumulative') + stats.print_stats(10) + + print '\n{:-^20}'.format(' Sorted By Function Calls ') + stats.sort_stats('calls') + stats.print_stats() + + return stats + + +def Main(): + """Start the tool.""" + usage = ( + u'Run this tool against a single file to see how many events are ' + u'extracted from it and which parsers recognize it.') + + arg_parser = argparse.ArgumentParser(description=usage) + + format_str = '[%(levelname)s] %(message)s' + logging.basicConfig(level=logging.INFO, format=format_str) + + arg_parser.add_argument( + '-v', '--verbose', dest='verbose', action='store_true', default=False, + help=( + 'Be extra verbose in the information printed out (include full ' + 'stats).')) + + arg_parser.add_argument( + '-c', '--console', dest='console', action='store_true', + default=False, help='After processing drop to an interactive shell.') + + arg_parser.add_argument( + '-p', '--parsers', dest='parsers', action='store', default='', type=str, + help='A list of parsers to include (see log2timeline documentation).') + + arg_parser.add_argument( + '--proto', dest='proto_file', action='store', default='', type=unicode, + metavar='PROTO_FILE', help=( + 'A file containing an ASCII PathSpec protobuf describing how to ' + 'open up the file for parsing.')) + + arg_parser.add_argument( + '-s', '--storage', dest='storage', action='store', type=unicode, + metavar='PSORT_PARAMETER', default='', help=( + 'Run the profiler against a storage file, with the parameters ' + 'provided with this option, eg: "-q -w /dev/null". The storage ' + 'file has to be passed in as the FILE_TO_PARSE argument to the ' + 'tool and filters are also optional. This is equivilant to calling ' + 'psort.py STORAGE_PARAMETER FILE_TO_PARSE [FILTER]. Where the ' + 'storage parameters are the ones defined with this parameter.')) + + # TODO: Add the option of dropping into a python shell that contains the + # stats attribute and others, just print out basic information and do the + # profiling, then drop into a ipython shell that allows you to work with + # the stats object. + + arg_parser.add_argument( + 'file_to_parse', nargs='?', action='store', metavar='FILE_TO_PARSE', + default=None, help='A path to the file that is to be parsed.') + + arg_parser.add_argument( + 'filter', action='store', metavar='FILTER', nargs='?', default=None, + help=('A filter that can be used to filter the dataset before it ' + 'is written into storage. More information about the filters' + ' and it\'s usage can be found here: http://plaso.kiddaland.' + 'net/usage/filters')) + + options = arg_parser.parse_args() + + if not (options.file_to_parse or options.proto_file): + arg_parser.print_help() + print '' + arg_parser.print_usage() + print '' + logging.error('Not able to run without a file to process.') + return False + + if options.file_to_parse and not os.path.isfile(options.file_to_parse): + logging.error(u'File [{0:s}] needs to exist.'.format(options.file_to_parse)) + return False + + PrintHeader(options) + # Stats attribute used for console sessions. + # pylint: disable=unused-variable + if options.storage: + stats = ProcessStorage(options) + else: + stats = ProcessFile(options) + + if options.console: + ipshell = InteractiveShellEmbed() + ipshell.confirm_exit = False + ipshell() + + return True + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/plaso/frontend/preg.py b/plaso/frontend/preg.py new file mode 100755 index 0000000..171db6f --- /dev/null +++ b/plaso/frontend/preg.py @@ -0,0 +1,2161 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parse your Windows Registry files using preg. + +preg is a simple Windows Registry parser using the plaso Registry plugins and +image parsing capabilities. It uses the back-end libraries of plaso to read +raw image files and extract Registry files from VSS and restore points and then +runs the Registry plugins of plaso against the Registry hive and presents it +in a textual format. +""" + +import argparse +import binascii +import logging +import os +import re +import sys +import textwrap + +from dfvfs.helpers import file_system_searcher +from dfvfs.lib import definitions as dfvfs_definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +try: + # Support version 1.X of IPython. + # pylint: disable=no-name-in-module + from IPython.terminal.embed import InteractiveShellEmbed +except ImportError: + # pylint: disable=no-name-in-module + from IPython.frontend.terminal.embed import InteractiveShellEmbed + +import IPython +from IPython.config.loader import Config +from IPython.core import magic + +import pysmdev + +from plaso.artifacts import knowledge_base +from plaso.engine import queue +from plaso.engine import single_process + +# Import the winreg formatter to register it, adding the option +# to print event objects using the default formatter. +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter + +from plaso.frontend import frontend +from plaso.frontend import utils as frontend_utils +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import context as parsers_context +from plaso.parsers import manager as parsers_manager +from plaso.parsers import winreg as winreg_parser +from plaso.parsers import winreg_plugins # pylint: disable=unused-import +from plaso.preprocessors import interface as preprocess_interface +from plaso.preprocessors import manager as preprocess_manager +from plaso.winreg import cache +from plaso.winreg import path_expander as winreg_path_expander +from plaso.winreg import winregistry + + +# Older versions of IPython don't have a version_info attribute. +if getattr(IPython, 'version_info', (0, 0, 0)) < (1, 2, 1): + raise ImportWarning( + 'Preg requires at least IPython version 1.2.1.') + + +class ConsoleConfig(object): + """Class that contains functions to configure console actions.""" + + @classmethod + def GetConfig(cls): + """Retrieves the iPython config. + + Returns: + The IPython config object (instance of + IPython.terminal.embed.InteractiveShellEmbed) + """ + try: + # The "get_ipython" function does not exist except within an IPython + # session. + return get_ipython() # pylint: disable=undefined-variable + except NameError: + return Config() + + @classmethod + def SetPrompt( + cls, hive_path=None, config=None, prepend_string=None): + """Sets the prompt string on the console. + + Args: + hive_path: The hive name or path as a string, this is optional name or + location of the loaded hive. If not defined the name is derived + from a default string. + config: The IPython configuration object (instance of + IPython.terminal.embed.InteractiveShellEmbed), this is optional + and is automatically derived if not used. + prepend_string: An optional string that can be injected into the prompt + just prior to the command count. + """ + if hive_path is None: + path_string = u'Unknown hive loaded' + else: + path_string = hive_path + + prompt_strings = [ + r'[{color.LightBlue}\T{color.Normal}]', + r'{color.LightPurple} ', + path_string, + r'\n{color.Normal}'] + if prepend_string is not None: + prompt_strings.append(u'{0:s} '.format(prepend_string)) + prompt_strings.append(r'[{color.Red}\#{color.Normal}] \$ ') + + if config is None: + ipython_config = cls.GetConfig() + else: + ipython_config = config + + try: + ipython_config.PromptManager.in_template = r''.join(prompt_strings) + except AttributeError: + ipython_config.prompt_manager.in_template = r''.join(prompt_strings) + + +class PregCache(object): + """Cache storage used for iPython and other aspects of preg.""" + + events_from_last_parse = [] + + knowledge_base_object = knowledge_base.KnowledgeBase() + + # Parser context, used when parsing Registry keys. + parser_context = None + + hive_storage = None + shell_helper = None + + +class PregEventObjectQueueConsumer(queue.EventObjectQueueConsumer): + """Class that implements a list event object queue consumer.""" + + def __init__(self, event_queue): + """Initializes the list event object queue consumer. + + Args: + event_queue: the event object queue (instance of Queue). + """ + super(PregEventObjectQueueConsumer, self).__init__(event_queue) + self.event_objects = [] + + def _ConsumeEventObject(self, event_object, **unused_kwargs): + """Consumes an event object callback for ConsumeEventObjects. + + Args: + event_object: the event object (instance of EventObject). + """ + self.event_objects.append(event_object) + + +class PregFrontend(frontend.ExtractionFrontend): + """Class that implements the preg front-end.""" + + # All Registry plugins start with "winreg_", thus the Preg library cuts that + # part of, both for display and matching. That way a plugin can be called by + # the second half of the name, eg: "userassist" instead of + # "winreg_userassist". + PLUGIN_UNIQUE_NAME_START = len('winreg_') + + # Define the different run modes. + RUN_MODE_CONSOLE = 1 + RUN_MODE_REG_FILE = 2 + RUN_MODE_REG_PLUGIN = 3 + RUN_MODE_REG_KEY = 4 + + def __init__(self, output_writer): + """Initializes the front-end object.""" + input_reader = frontend.StdinFrontendInputReader() + + super(PregFrontend, self).__init__(input_reader, output_writer) + self._key_path = None + self._parse_restore_points = False + self._verbose_output = False + self.plugins = None + + def GetListOfAllPlugins(self): + """Returns information about the supported plugins.""" + return_strings = [] + # TODO: replace frontend_utils.FormatHeader by frontend function. + return_strings.append(frontend_utils.FormatHeader(u'Supported Plugins')) + all_plugins = parsers_manager.ParsersManager.GetWindowsRegistryPlugins() + + return_strings.append(frontend_utils.FormatHeader(u'Key Plugins')) + for plugin_obj in all_plugins.GetAllKeyPlugins(): + return_strings.append(frontend_utils.FormatOutputString( + plugin_obj.NAME[self.PLUGIN_UNIQUE_NAME_START:], + plugin_obj.DESCRIPTION)) + + return_strings.append(frontend_utils.FormatHeader(u'Value Plugins')) + for plugin_obj in all_plugins.GetAllValuePlugins(): + return_strings.append(frontend_utils.FormatOutputString( + plugin_obj.NAME[self.PLUGIN_UNIQUE_NAME_START:], + plugin_obj.DESCRIPTION)) + + return u'\n'.join(return_strings) + + def ParseHive( + self, hive_path_or_path_spec, hive_collectors, shell_helper, + key_paths=None, use_plugins=None, verbose=False): + """Opens a hive file, and returns information about parsed keys. + + This function takes a path to a hive and a list of collectors (or + none if the Registry file is passed to the tool). + + The function then opens up the hive inside each collector and runs + the plugins defined (or all if no plugins are defined) against all + the keys supplied to it. + + Args: + hive_path: Full path to the hive file in question. + hive_collectors: A list of collectors to use (instance of + dfvfs.helpers.file_system_searcher.FileSystemSearcher) + shell_helper: A helper object (instance of PregHelper). + key_paths: A list of Registry keys paths that are to be parsed. + use_plugins: A list of plugins used to parse the key, None if all + plugins should be used. + verbose: Print more verbose content, like hex dump of extracted events. + + Returns: + A string containing extracted information. + """ + if isinstance(hive_path_or_path_spec, basestring): + hive_path_spec = None + hive_path = hive_path_or_path_spec + else: + hive_path_spec = hive_path_or_path_spec + hive_path = hive_path_spec.location + + if key_paths is None: + key_paths = [] + + print_strings = [] + for name, hive_collector in hive_collectors: + # Printing '*' 80 times. + print_strings.append(u'*' * 80) + print_strings.append( + u'{0:>15} : {1:s}{2:s}'.format(u'Hive File', hive_path, name)) + if hive_path_spec: + current_hive = shell_helper.OpenHive(hive_path_spec, hive_collector) + else: + current_hive = shell_helper.OpenHive(hive_path, hive_collector) + + if not current_hive: + continue + + for key_path in key_paths: + key_texts = [] + key_dict = {} + if current_hive.reg_cache: + key_dict.update(current_hive.reg_cache.attributes.items()) + + if PregCache.knowledge_base_object.pre_obj: + key_dict.update( + PregCache.knowledge_base_object.pre_obj.__dict__.items()) + + key = current_hive.GetKeyByPath(key_path) + key_texts.append(u'{0:>15} : {1:s}'.format(u'Key Name', key_path)) + if not key: + key_texts.append(u'Unable to open key: {0:s}'.format(key_path)) + if verbose: + print_strings.extend(key_texts) + continue + key_texts.append( + u'{0:>15} : {1:d}'.format(u'Subkeys', key.number_of_subkeys)) + key_texts.append(u'{0:>15} : {1:d}'.format( + u'Values', key.number_of_values)) + key_texts.append(u'') + + if verbose: + key_texts.append(u'{0:-^80}'.format(u' SubKeys ')) + for subkey in key.GetSubkeys(): + key_texts.append( + u'{0:>15} : {1:s}'.format(u'Key Name', subkey.path)) + + key_texts.append(u'') + key_texts.append(u'{0:-^80}'.format(u' Plugins ')) + + output_string = ParseKey( + key=key, shell_helper=shell_helper, verbose=verbose, + use_plugins=use_plugins, hive_helper=current_hive) + key_texts.extend(output_string) + + print_strings.extend(key_texts) + + return u'\n'.join(print_strings) + + def ParseOptions(self, options, source_option='source'): + """Parses the options and initializes the front-end. + + Args: + options: the command line arguments (instance of argparse.Namespace). + source_option: optional name of the source option. The default is source. + + Raises: + BadConfigOption: if the options are invalid. + """ + if not options: + raise errors.BadConfigOption(u'Missing options.') + + image = getattr(options, 'image', None) + regfile = getattr(options, 'regfile', None) + + if not image and not regfile: + raise errors.BadConfigOption(u'Not enough parameters to proceed.') + + if image: + self._source_path = image + + if regfile: + if not image and not os.path.isfile(regfile): + raise errors.BadConfigOption( + u'Registry file: {0:s} does not exist.'.format(regfile)) + + self._key_path = getattr(options, 'key', None) + self._parse_restore_points = getattr(options, 'restore_points', False) + + self._verbose_output = getattr(options, 'verbose', False) + + self.plugins = parsers_manager.ParsersManager.GetWindowsRegistryPlugins() + + if image: + file_to_check = image + else: + file_to_check = regfile + + is_file, reason = PathExists(file_to_check) + if not is_file: + raise errors.BadConfigOption( + u'Unable to read the input file with error: {0:s}'.format(reason)) + + if getattr(options, 'console', False): + self.run_mode = self.RUN_MODE_CONSOLE + elif getattr(options, 'key', u'') and regfile: + self.run_mode = self.RUN_MODE_REG_KEY + elif getattr(options, 'plugin_names', u''): + self.run_mode = self.RUN_MODE_REG_PLUGIN + elif regfile: + self.run_mode = self.RUN_MODE_REG_PLUGIN + else: + raise errors.BadConfigOption( + u'Incorrect usage. You\'ll need to define the path of either ' + u'a storage media image or a Windows Registry file.') + + def _ExpandKeysRedirect(self, keys): + """Expands a list of Registry key paths with their redirect equivalents. + + Args: + keys: a list of Windows Registry key paths. + """ + for key in keys: + if key.startswith('\\Software') and 'Wow6432Node' not in key: + _, first, second = key.partition('\\Software') + keys.append(u'{0:s}\\Wow6432Node{1:s}'.format(first, second)) + + # TODO: clean up this function as part of dfvfs find integration. + # TODO: a duplicate of this function exists in class: WinRegistryPreprocess + # method: GetValue; merge them. + def _FindRegistryPaths(self, searcher, pattern): + """Return a list of Windows Registry file paths. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + pattern: The pattern to find. + """ + # TODO: optimize this in one find. + hive_paths = [] + file_path, _, file_name = pattern.rpartition(u'/') + + # The path is split in segments to make it path segement separator + # independent (and thus platform independent). + path_segments = file_path.split(u'/') + if not path_segments[0]: + path_segments = path_segments[1:] + + find_spec = file_system_searcher.FindSpec( + location_regex=path_segments, case_sensitive=False) + path_specs = list(searcher.Find(find_specs=[find_spec])) + + if not path_specs: + logging.debug(u'Directory: {0:s} not found'.format(file_path)) + return hive_paths + + for path_spec in path_specs: + directory_location = getattr(path_spec, 'location', None) + if not directory_location: + raise errors.PreProcessFail( + u'Missing directory location for: {0:s}'.format(file_path)) + + # The path is split in segments to make it path segment separator + # independent (and thus platform independent). + path_segments = searcher.SplitPath(directory_location) + path_segments.append(file_name) + + find_spec = file_system_searcher.FindSpec( + location_regex=path_segments, case_sensitive=False) + fh_path_specs = list(searcher.Find(find_specs=[find_spec])) + + if not fh_path_specs: + logging.debug(u'File: {0:s} not found in directory: {1:s}'.format( + file_name, directory_location)) + continue + + hive_paths.extend(fh_path_specs) + + return hive_paths + + def _GetRegistryFilePaths(self, plugin_name=None, registry_type=None): + """Returns a list of Registry paths from a configuration object. + + Args: + plugin_name: optional string containing the name of the plugin or an empty + string or None for all the types. Defaults to None. + registry_type: optional Registry type string. None by default. + + Returns: + A list of path names for registry files. + """ + if self._parse_restore_points: + restore_path = u'/System Volume Information/_restor.+/RP[0-9]+/snapshot/' + else: + restore_path = u'' + + if registry_type: + types = [registry_type] + else: + types = self._GetRegistryTypes(plugin_name) + + # Gather the Registry files to fetch. + paths = [] + + for reg_type in types: + if reg_type == 'NTUSER': + paths.append('/Documents And Settings/.+/NTUSER.DAT') + paths.append('/Users/.+/NTUSER.DAT') + if restore_path: + paths.append('{0:s}/_REGISTRY_USER_NTUSER.+'.format(restore_path)) + + elif reg_type == 'SOFTWARE': + paths.append('{sysregistry}/SOFTWARE') + if restore_path: + paths.append('{0:s}/_REGISTRY_MACHINE_SOFTWARE'.format(restore_path)) + + elif reg_type == 'SYSTEM': + paths.append('{sysregistry}/SYSTEM') + if restore_path: + paths.append('{0:s}/_REGISTRY_MACHINE_SYSTEM'.format(restore_path)) + + elif reg_type == 'SECURITY': + paths.append('{sysregistry}/SECURITY') + if restore_path: + paths.append('{0:s}/_REGISTRY_MACHINE_SECURITY'.format(restore_path)) + + elif reg_type == 'USRCLASS': + paths.append('/Users/.+/AppData/Local/Microsoft/Windows/UsrClass.dat') + + elif reg_type == 'SAM': + paths.append('{sysregistry}/SAM') + if restore_path: + paths.append('{0:s}/_REGISTRY_MACHINE_SAM'.format(restore_path)) + + # Expand all the paths. + expanded_paths = [] + expander = winreg_path_expander.WinRegistryKeyPathExpander() + for path in paths: + try: + expanded_paths.append(expander.ExpandPath( + path, pre_obj=PregCache.knowledge_base_object.pre_obj)) + + except KeyError as exception: + logging.error(u'Unable to expand keys with error: {0:s}'.format( + exception)) + + return expanded_paths + + def _GetRegistryKeysFromHive(self, hive_helper, parser_context): + """Retrieves a list of all key plugins for a given Registry type. + + Args: + hive_helper: A hive object (instance of PregHiveHelper). + parser_context: A parser context object (instance of ParserContext). + + Returns: + A list of Windows Registry keys. + """ + keys = [] + if not hive_helper: + return + for key_plugin_cls in self.plugins.GetAllKeyPlugins(): + temp_obj = key_plugin_cls(reg_cache=hive_helper.reg_cache) + if temp_obj.REG_TYPE == hive_helper.type: + temp_obj.ExpandKeys(parser_context) + keys.extend(temp_obj.expanded_keys) + + return keys + + def _GetRegistryPlugins(self, plugin_name): + """Retrieves the Windows Registry plugins based on a filter string. + + Args: + plugin_name: string containing the name of the plugin or an empty + string for all the plugins. + + Returns: + A list of Windows Registry plugins. + """ + key_plugin_names = [] + for plugin in self.plugins.GetAllKeyPlugins(): + temp_obj = plugin(None) + key_plugin_names.append(temp_obj.plugin_name) + + if not plugin_name: + return key_plugin_names + + plugin_name = plugin_name.lower() + if not plugin_name.startswith('winreg'): + plugin_name = u'winreg_{0:s}'.format(plugin_name) + + plugins_to_run = [] + for key_plugin in key_plugin_names: + if plugin_name in key_plugin.lower(): + plugins_to_run.append(key_plugin) + + return plugins_to_run + + def _GetRegistryTypes(self, plugin_name): + """Retrieves the Windows Registry types based on a filter string. + + Args: + plugin_name: string containing the name of the plugin or an empty + string for all the types. + + Returns: + A list of Windows Registry types. + """ + reg_cache = cache.WinRegistryCache() + types = [] + for plugin in self._GetRegistryPlugins(plugin_name): + for key_plugin_cls in self.plugins.GetAllKeyPlugins(): + temp_obj = key_plugin_cls(reg_cache=reg_cache) + if plugin is temp_obj.plugin_name: + if temp_obj.REG_TYPE not in types: + types.append(temp_obj.REG_TYPE) + break + + return types + + def _GetSearchersForImage(self, volume_path_spec): + """Retrieves the file systems searchers for searching the image. + + Args: + volume_path_spec: The path specification of the volume containing + the file system (instance of dfvfs.PathSpec). + + Returns: + A list of tuples containing the a string identifying the file system + searcher and a file system searcher object (instance of + dfvfs.FileSystemSearcher). + """ + searchers = [] + + path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', + parent=volume_path_spec) + + file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec) + searcher = file_system_searcher.FileSystemSearcher( + file_system, volume_path_spec) + + searchers.append((u'', searcher)) + + vss_stores = self._vss_stores + + if not vss_stores: + return searchers + + for store_index in vss_stores: + vss_path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_VSHADOW, store_index=store_index - 1, + parent=volume_path_spec) + path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/', + parent=vss_path_spec) + + file_system = path_spec_resolver.Resolver.OpenFileSystem(path_spec) + searcher = file_system_searcher.FileSystemSearcher( + file_system, vss_path_spec) + + searchers.append(( + u':VSS Store {0:d}'.format(store_index), searcher)) + + return searchers + + def GetHivesAndCollectors( + self, options, registry_types=None, plugin_names=None): + """Returns a list of discovered Registry hives and collectors. + + Args: + options: the command line arguments (instance of argparse.Namespace). + registry_types: an optional list of Registry types, eg: NTUSER, SAM, etc + that should be included. Defaults to None. + plugin_names: an optional list of strings containing the name of the + plugin(s) or an empty string for all the types. Defaults to + None. + + Returns: + A tuple of hives and searchers, where hives is a list that contains + either a string (location of a Registry hive) or path specs (instance of + dfvfs.path.path_spec.PathSpec). The searchers is a list of tuples that + contain the name of the searcher and a searcher object (instance of + dfvfs.helpers.file_system_searcher.FileSystemSearcher) or None (if no + searcher is required). + + Raises: + ValueError: If neither registry_types nor plugin name is passed + as a parameter. + BadConfigOption: If the source scanner is unable to complete due to + a source scanner error or back end error in dfvfs. + """ + if registry_types is None and plugin_names is None: + raise ValueError( + u'Missing Registry_types or plugin_name value.') + + if plugin_names is None: + plugin_names = [] + else: + plugin_names = [plugin_name.lower() for plugin_name in plugin_names] + + # TODO: use non-preprocess collector with filter to collect hives. + + # TODO: rewrite to always use collector or equiv. + if not self._source_path: + searchers = [(u'', None)] + return registry_types, searchers + + try: + self.ScanSource(options) + except errors.SourceScannerError as exception: + raise errors.BadConfigOption(( + u'Unable to scan for a supported filesystem with error: {0:s}\n' + u'Most likely the image format is not supported by the ' + u'tool.').format(exception)) + + searchers = self._GetSearchersForImage(self.GetSourcePathSpec().parent) + _, searcher = searchers[0] + + # Run preprocessing on image. + platform = preprocess_interface.GuessOS(searcher) + + preprocess_manager.PreprocessPluginsManager.RunPlugins( + platform, searcher, PregCache.knowledge_base_object) + + # Create the keyword list if plugins are used. + plugins_list = parsers_manager.ParsersManager.GetWindowsRegistryPlugins() + if plugin_names: + if registry_types is None: + registry_types = [] + for plugin_name in plugin_names: + if not plugin_name.startswith('winreg_'): + plugin_name = u'winreg_{0:s}'.format(plugin_name) + + for plugin_cls in plugins_list.GetAllKeyPlugins(): + if plugin_name == plugin_cls.NAME.lower(): + # If a plugin is available for every Registry type + # we need to make sure all Registry hives are included. + if plugin_cls.REG_TYPE == u'any': + for available_type in PregHiveHelper.REG_TYPES.iterkeys(): + if available_type is u'Unknown': + continue + + if available_type not in registry_types: + registry_types.append(available_type) + + if plugin_cls.REG_TYPE not in registry_types: + registry_types.append(plugin_cls.REG_TYPE) + + # Find all the Registry paths we need to check. + paths = [] + if registry_types: + for registry_type in registry_types: + paths.extend(self._GetRegistryFilePaths( + registry_type=registry_type.upper())) + else: + for plugin_name in plugin_names: + paths.extend(self._GetRegistryFilePaths(plugin_name=plugin_name)) + + hives = [] + for path in paths: + hives.extend(self._FindRegistryPaths(searcher, path)) + + return hives, searchers + + def RunModeRegistryKey(self, options, plugin_names): + """Run against a specific Registry key. + + Finds and opens all Registry hives as configured in the configuration + object and tries to open the Registry key that is stored in the + configuration object for every detected hive file and parses it using + all available plugins. + + Args: + options: the command line arguments (instance of argparse.Namespace). + plugin_names: a list of strings containing the name of the plugin(s) or + an empty list for all the types. + """ + regfile = getattr(options, 'regfile', u'') + + hives, hive_collectors = self.GetHivesAndCollectors( + options, registry_types=[regfile], + plugin_names=plugin_names) + + key_paths = [self._key_path] + + # Expand the keys paths if there is a need (due to Windows redirect). + self._ExpandKeysRedirect(key_paths) + + hive_storage = PregStorage() + shell_helper = PregHelper(options, self, hive_storage) + + if hives is None: + hives = [regfile] + + for hive in hives: + output_string = self.ParseHive( + hive, hive_collectors, shell_helper, + key_paths=key_paths, verbose=self._verbose_output) + self._output_writer.Write(output_string) + + def RunModeRegistryPlugin(self, options, plugin_names): + """Run against a set of Registry plugins. + + Args: + options: the command line arguments (instance of argparse.Namespace). + plugin_names: a list of strings containing the name of the plugin(s) or + an empty string for all the types. + """ + # TODO: Add support for splitting the output to separate files based on + # each plugin name. + hives, hive_collectors = self.GetHivesAndCollectors( + options, plugin_names=plugin_names) + + if hives is None: + hives = [getattr(options, 'regfile', None)] + + plugin_list = [] + for plugin_name in plugin_names: + plugin_list.extend(self._GetRegistryPlugins(plugin_name)) + + # In order to get all the Registry keys we need to expand + # them, but to do so we need to open up one hive so that we + # create the reg_cache object, which is necessary to fully + # expand all keys. + _, hive_collector = hive_collectors[0] + hive_storage = PregStorage() + shell_helper = PregHelper(options, self, hive_storage) + hive_helper = shell_helper.OpenHive(hives[0], hive_collector) + parser_context = shell_helper.BuildParserContext() + + # Get all the appropriate keys from these plugins. + key_paths = self.plugins.GetExpandedKeyPaths( + parser_context, reg_cache=hive_helper.reg_cache, + plugin_names=plugin_list) + + for hive in hives: + output_string = self.ParseHive( + hive, hive_collectors, shell_helper, + key_paths=key_paths, use_plugins=plugin_list, + verbose=self._verbose_output) + self._output_writer.Write(output_string) + + def RunModeRegistryFile(self, options, regfile): + """Run against a Registry file. + + Finds and opens all Registry hives as configured in the configuration + object and determines the type of Registry file opened. Then it will + load up all the Registry plugins suitable for that particular Registry + file, find all Registry keys they are able to parse and run through + them, one by one. + + Args: + options: the command line arguments (instance of argparse.Namespace). + regfile: A string containing either full path to the Registry hive or + a keyword to match it. + """ + # Get all the hives and collectors. + hives, hive_collectors = self.GetHivesAndCollectors( + options, registry_types=[regfile]) + + hive_storage = PregStorage() + shell_helper = PregHelper(options, self, hive_storage) + parser_context = shell_helper.BuildParserContext() + + for hive in hives: + for collector_name, hive_collector in hive_collectors: + hive_helper = shell_helper.OpenHive( + hive, hive_collector=hive_collector, + hive_collector_name=collector_name) + hive_type = hive_helper.type + + key_paths = self._GetRegistryKeysFromHive(hive_helper, parser_context) + self._ExpandKeysRedirect(key_paths) + + plugins_to_run = self._GetRegistryPlugins(hive_type) + output_string = self.ParseHive( + hive, hive_collectors, shell_helper, key_paths=key_paths, + use_plugins=plugins_to_run, verbose=self._verbose_output) + self._output_writer.Write(output_string) + + +class PregHelper(object): + """Class that defines various helper functions. + + The purpose of this class is to bridge the plaso generated objects + with the IPython objects, making it easier to create magic classes + and provide additional helper functions to the IPython shell. + """ + + def __init__(self, tool_options, tool_front_end, hive_storage): + """Initialize the helper object. + + Args: + tool_options: A configuration object. + tool_front_end: A front end object (instance of PregFrontend). + hive_storage: A hive storage object (instance of PregStorage). + """ + super(PregHelper, self).__init__() + self.tool_options = tool_options + self.tool_front_end = tool_front_end + self.hive_storage = hive_storage + + def BuildParserContext(self, event_queue=None): + """Build the parser object. + + Args: + event_queue: An event queue object (instance of Queue). This is + optional and if a queue is not provided a default + one will be provided. + + Returns: + A parser context object (instance of parsers_context.ParserContext). + """ + if event_queue is None: + event_queue = single_process.SingleProcessQueue() + event_queue_producer = queue.ItemQueueProducer(event_queue) + + parse_error_queue = single_process.SingleProcessQueue() + parse_error_queue_producer = queue.ItemQueueProducer(parse_error_queue) + + return parsers_context.ParserContext( + event_queue_producer, parse_error_queue_producer, + PregCache.knowledge_base_object) + + def OpenHive( + self, filename_or_path_spec, hive_collector, hive_collector_name=None, + codepage='cp1252'): + """Open a Registry hive based on a collector or a filename. + + Args: + filename_or_path_spec: file path to the hive as a string or a path spec + object (instance of dfvfs.path.path_spec.PathSpec) + hive_collector: the collector to use (instance of + dfvfs.helpers.file_system_searcher.FileSystemSearcher) + hive_collector_name: optional string denoting the name of the collector + used. The default value is None. + codepage: the default codepage, default is cp1252. + + Returns: + A hive helper object (instance of PregHiveHelper). + """ + PregCache.knowledge_base_object.SetDefaultCodepage(codepage) + + if isinstance(filename_or_path_spec, basestring): + filename = filename_or_path_spec + path_spec = None + else: + filename = filename_or_path_spec.location + path_spec = filename_or_path_spec + + if not hive_collector: + path_spec = path_spec_factory.Factory.NewPathSpec( + dfvfs_definitions.TYPE_INDICATOR_OS, location=filename) + file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) + else: + file_entry = hive_collector.GetFileEntryByPathSpec(path_spec) + + win_registry = winregistry.WinRegistry( + winregistry.WinRegistry.BACKEND_PYREGF) + + try: + hive_object = win_registry.OpenFile( + file_entry, codepage=PregCache.knowledge_base_object.codepage) + except IOError: + if filename is not None: + filename_string = filename + elif path_spec: + filename_string = path_spec.location + else: + filename_string = u'unknown file path' + logging.error( + u'Unable to open Registry hive: {0:s} [{1:s}]'.format( + filename_string, hive_collector_name)) + return + + return PregHiveHelper( + hive_object, file_entry=file_entry, collector_name=hive_collector_name) + + def Scan(self, registry_types): + """Scan for available hives using keyword. + + Args: + registry_types: A list of keywords to scan for, eg: "NTUSER", + "SOFTWARE", etc. + """ + if not registry_types: + print ( + u'Unable to scan for an empty keyword. Please specify a keyword, ' + u'eg: NTUSER, SOFTWARE, etc') + return + + hives, collectors = self.tool_front_end.GetHivesAndCollectors( + self.tool_options, registry_types=registry_types) + + if not hives: + print u'No new discovered hives.' + return + + if type(hives) in (list, tuple): + for hive in hives: + for name, collector in collectors: + hive_helper = self.OpenHive( + hive, hive_collector=collector, hive_collector_name=name) + if hive_helper: + self.hive_storage.AppendHive(hive_helper) + else: + for name, collector in collectors: + hive_helper = self.OpenHive( + hives, hive_collector=collector, hive_collector_name=name) + if hive_helper: + self.hive_storage.AppendHive(hive_helper) + + +class PregHiveHelper(object): + """Class that defines few helper functions for Registry operations.""" + + _currently_loaded_registry_key = '' + _hive = None + _hive_type = u'UNKNOWN' + + collector_name = None + file_entry = None + path_expander = None + reg_cache = None + + REG_TYPES = { + u'NTUSER': ('\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer',), + u'SOFTWARE': ('\\Microsoft\\Windows\\CurrentVersion\\App Paths',), + u'SECURITY': ('\\Policy\\PolAdtEv',), + u'SYSTEM': ('\\Select',), + u'SAM': ('\\SAM\\Domains\\Account\\Users',), + u'USRCLASS': ( + '\\Local Settings\\Software\\Microsoft\\Windows\\CurrentVersion',), + u'UNKNOWN': (), + } + + @property + def name(self): + """Return the name of the hive.""" + return getattr(self._hive, 'name', u'N/A') + + @property + def path(self): + """Return the file path of the hive.""" + path_spec = getattr(self.file_entry, 'path_spec', None) + if not path_spec: + return u'N/A' + + return getattr(path_spec, 'location', u'N/A') + + @property + def root_key(self): + """Return the root key of the Registry hive.""" + return self._hive.GetKeyByPath(u'\\') + + @property + def type(self): + """Return the hive type.""" + return self._hive_type + + def __init__(self, hive, file_entry, collector_name): + """Initialize the Registry hive helper. + + Args: + hive: A hive object (instance of WinPyregfFile). + file_entry: A file entry object (instance of dfvfs.FileEntry). + collector_name: Name of the collector used as a string. + """ + self._hive = hive + self.file_entry = file_entry + self.collector_name = collector_name + + # Determine type and build cache. + self._SetHiveType() + self._BuildHiveCache() + + # Initialize the hive to the root key. + _ = self.GetKeyByPath(u'\\') + + def _BuildHiveCache(self): + """Calculate the Registry cache.""" + self.reg_cache = cache.WinRegistryCache() + self.reg_cache.BuildCache(self._hive, self._hive_type) + self.path_expander = winreg_path_expander.WinRegistryKeyPathExpander( + reg_cache=self.reg_cache) + + def _SetHiveType(self): + """Detect and set the hive type.""" + get_key_by_path = self._hive.GetKeyByPath + for reg_type in self.REG_TYPES: + if reg_type == u'UNKNOWN': + continue + + # For a hive to be considered a specific type all of the keys need to + # be found. + found = True + for reg_key in self.REG_TYPES[reg_type]: + if not get_key_by_path(reg_key): + found = False + break + + if found: + self._hive_type = reg_type + return + + def GetCurrentRegistryKey(self): + """Return the currently loaded Registry key.""" + return self._currently_loaded_registry_key + + def GetCurrentRegistryPath(self): + """Return the loaded Registry key path or None if no key is loaded.""" + key = self._currently_loaded_registry_key + if not key: + return + + return key.path + + def GetKeyByPath(self, path): + """Retrieves a specific key defined by the Registry path. + + Args: + path: the Registry path. + + Returns: + The key (instance of WinRegKey) if available or None otherwise. + """ + if not path: + return + + key = self._hive.GetKeyByPath(path) + if not key: + return + + self._currently_loaded_registry_key = key + return key + + +class PregStorage(object): + """Class for storing discovered hives.""" + + # Index number of the currently loaded Registry hive. + _current_index = -1 + _currently_loaded_hive = None + + _hive_list = [] + + @property + def loaded_hive(self): + """Return the currently loaded hive or None if no hive loaded.""" + if not self._currently_loaded_hive: + return + + return self._currently_loaded_hive + + def __len__(self): + """Return the number of available hives.""" + return len(self._hive_list) + + def AppendHive(self, hive_helper): + """Append a hive object to the Registry hive storage. + + Args: + hive_helper: A hive object (instance of PregHiveHelper) + """ + self._hive_list.append(hive_helper) + + def AppendHives(self, hive_helpers): + """Append hives to the Registry hive storage. + + Args: + hive_helpers: A list of hive objects (instance of PregHiveHelper) + """ + if type(hive_helpers) not in (list, tuple): + hive_helpers = [hive_helpers] + + self._hive_list.extend(hive_helpers) + + def ListHives(self): + """Return a string with a list of all available hives and collectors. + + Returns: + A string with a list of all available hives and collectors. If there are + no loaded hives None will be returned. + """ + if not self._hive_list: + return + + return_strings = [u'Index Hive [collector]'] + for index, hive in enumerate(self._hive_list): + collector = hive.collector_name + if not collector: + collector = u'Currently Allocated' + + if self._current_index == index: + star = u'*' + else: + star = u'' + return_strings.append(u'{0:<5d} {1:s}{2:s} [{3:s}]'.format( + index, star, hive.path, collector)) + + return u'\n'.join(return_strings) + + def SetOpenHive(self, hive_index): + """Set the current open hive. + + Args: + hive_index: An index (integer) into the hive list. + """ + if not self._hive_list: + return + + index = hive_index + if isinstance(hive_index, basestring): + try: + index = int(hive_index, 10) + except ValueError: + print u'Wrong hive index, value should be decimal.' + return + + try: + hive_helper = self._hive_list[index] + except IndexError: + print u'Hive not found, index out of range?' + return + + self._current_index = index + self._currently_loaded_hive = hive_helper + + +def CdCompleter(unused_self, unused_event): + """Completer function for the cd command, returning back sub keys.""" + return_list = [] + current_hive = PregCache.hive_storage.loaded_hive + current_key = current_hive.GetCurrentRegistryKey() + for key in current_key.GetSubkeys(): + return_list.append(key.name) + + return return_list + + +def PluginCompleter(unused_self, event_object): + """Completer function that returns a list of available plugins.""" + ret_list = [] + + if not IsLoaded(): + return ret_list + + if not '-h' in event_object.line: + ret_list.append('-h') + + plugins_list = parsers_manager.ParsersManager.GetWindowsRegistryPlugins() + + current_hive = PregCache.hive_storage.loaded_hive + hive_type = current_hive.type + + for plugin_cls in plugins_list.GetKeyPlugins(hive_type): + plugins_list = plugin_cls(reg_cache=current_hive.reg_cache) + + plugin_name = plugins_list.plugin_name + if plugin_name.startswith('winreg'): + plugin_name = plugin_name[PregFrontend.PLUGIN_UNIQUE_NAME_START:] + + if plugin_name == 'default': + continue + ret_list.append(plugin_name) + + return ret_list + + +def VerboseCompleter(unused_self, event_object): + """Completer function that suggests simple verbose settings.""" + if '-v' in event_object.line: + return [] + else: + return ['-v'] + + +@magic.magics_class +class MyMagics(magic.Magics): + """A simple class holding all magic functions for console.""" + + EXPANSION_KEY_OPEN = r'{' + EXPANSION_KEY_CLOSE = r'}' + + # Match against one instance, not two of the expansion key. + EXPANSION_RE = re.compile(r'{0:s}{{1}}[^{1:s}]+?{1:s}'.format( + EXPANSION_KEY_OPEN, EXPANSION_KEY_CLOSE)) + + output_writer = sys.stdout + + @magic.line_magic('cd') + def ChangeDirectory(self, key): + """Change between Registry keys, like a directory tree. + + The key path can either be an absolute path or a relative one. + Absolute paths can use '.' and '..' to denote current and parent + directory/key path. + + Args: + key: The path to the key to traverse to. + """ + registry_key = None + key_path = key + + if not key: + self.ChangeDirectory('\\') + + loaded_hive = PregCache.hive_storage.loaded_hive + + if not loaded_hive: + return + + # Check if we need to expand environment attributes. + match = self.EXPANSION_RE.search(key) + if match and u'{0:s}{0:s}'.format( + self.EXPANSION_KEY_OPEN) not in match.group(0): + try: + key = loaded_hive.path_expander.ExpandPath( + key, pre_obj=PregCache.knowledge_base_object.pre_obj) + except (KeyError, IndexError): + pass + + if key.startswith(u'\\'): + registry_key = loaded_hive.GetKeyByPath(key) + elif key == '.': + return + elif key.startswith(u'.\\'): + current_path = loaded_hive.GetCurrentRegistryPath() + _, _, key_path = key.partition(u'\\') + registry_key = loaded_hive.GetKeyByPath(u'{0:s}\\{1:s}'.format( + current_path, key_path)) + elif key.startswith(u'..'): + parent_path, _, _ = loaded_hive.GetCurrentRegistryPath().rpartition(u'\\') + # We know the path starts with a "..". + if len(key) == 2: + key_path = u'' + else: + key_path = key[3:] + if parent_path: + if key_path: + path = u'{0:s}\\{1:s}'.format(parent_path, key_path) + else: + path = parent_path + registry_key = loaded_hive.GetKeyByPath(path) + else: + registry_key = loaded_hive.GetKeyByPath(u'\\{0:s}'.format(key_path)) + + else: + # Check if key is not set at all, then assume traversal from root. + if not loaded_hive.GetCurrentRegistryPath(): + _ = loaded_hive.GetKeyByPath(u'\\') + + current_key = loaded_hive.GetCurrentRegistryKey() + if current_key.name == loaded_hive.root_key.name: + key_path = u'\\{0:s}'.format(key) + else: + key_path = u'{0:s}\\{1:s}'.format(current_key.path, key) + registry_key = loaded_hive.GetKeyByPath(key_path) + + if registry_key: + if key_path == '\\': + path = '\\' + else: + path = registry_key.path + + ConsoleConfig.SetPrompt( + hive_path=loaded_hive.path, + prepend_string=StripCurlyBrace(path).replace('\\', '\\\\')) + else: + print u'Unable to change to: {0:s}'.format(key_path) + + @magic.line_magic('hive') + def HiveActions(self, line): + """Define the hive command on the console prompt.""" + if line.startswith('list'): + print PregCache.hive_storage.ListHives() + + print u'' + print u'To open a hive, use: hive_open INDEX' + elif line.startswith('open ') or line.startswith('load '): + PregCache.hive_storage.SetOpenHive(line[5:]) + hive_helper = PregCache.hive_storage.loaded_hive + print u'Opening hive: {0:s} [{1:s}]'.format( + hive_helper.path, hive_helper.collector_name) + ConsoleConfig.SetPrompt(hive_path=hive_helper.path) + elif line.startswith('scan'): + items = line.split() + if len(items) < 2: + print ( + u'Unable to scan for an empty keyword. Please specify a keyword, ' + u'eg: NTUSER, SOFTWARE, etc') + return + + PregCache.hive_storage.Scan(items[1:]) + + @magic.line_magic('ls') + def ListDirectoryContent(self, line): + """List all subkeys and values of the current key.""" + if not IsLoaded(): + return + + if 'true' in line.lower(): + verbose = True + elif '-v' in line.lower(): + verbose = True + else: + verbose = False + + sub = [] + current_hive = PregCache.hive_storage.loaded_hive + if not current_hive: + return + + current_key = current_hive.GetCurrentRegistryKey() + for key in current_key.GetSubkeys(): + # TODO: move this construction into a separate function in OutputWriter. + timestamp, _, _ = frontend_utils.OutputWriter.GetDateTimeString( + key.last_written_timestamp).partition('.') + + sub.append((u'{0:>19s} {1:>15s} {2:s}'.format( + timestamp.replace('T', ' '), '[KEY]', + key.name), True)) + + for value in current_key.GetValues(): + if not verbose: + sub.append((u'{0:>19s} {1:>14s}] {2:s}'.format( + u'', '[' + value.data_type_string, value.name), False)) + else: + if value.DataIsString(): + value_string = u'{0:s}'.format(value.data) + elif value.DataIsInteger(): + value_string = u'{0:d}'.format(value.data) + elif value.DataIsMultiString(): + value_string = u'{0:s}'.format(u''.join(value.data)) + elif value.DataIsBinaryData(): + hex_string = binascii.hexlify(value.data) + # We'll just print the first few bytes, but we need to pad them + # to make it fit in a single line if shorter. + if len(hex_string) % 32: + breakpoint = len(hex_string) / 32 + leftovers = hex_string[breakpoint:] + pad = ' ' * (32 - len(leftovers)) + hex_string += pad + + value_string = frontend_utils.OutputWriter.GetHexDumpLine( + hex_string, 0) + else: + value_string = u'' + + sub.append(( + u'{0:>19s} {1:>14s}] {2:<25s} {3:s}'.format( + u'', '[' + value.data_type_string, value.name, value_string), + False)) + + for entry, subkey in sorted(sub): + if subkey: + self.output_writer.write(u'dr-xr-xr-x {0:s}\n'.format(entry)) + else: + self.output_writer.write(u'-r-xr-xr-x {0:s}\n'.format(entry)) + + @magic.line_magic('parse') + def ParseCurrentKey(self, line): + """Parse the current key.""" + if 'true' in line.lower(): + verbose = True + elif '-v' in line.lower(): + verbose = True + else: + verbose = False + + if not IsLoaded(): + return + + current_hive = PregCache.hive_storage.loaded_hive + if not current_hive: + return + + # Clear the last results from parse key. + PregCache.events_from_last_parse = [] + + print_strings = ParseKey( + key=current_hive.GetCurrentRegistryKey(), hive_helper=current_hive, + shell_helper=PregCache.shell_helper, verbose=verbose) + self.output_writer.write(u'\n'.join(print_strings)) + + # Print out a hex dump of all binary values. + if verbose: + header_shown = False + for value in current_hive.GetCurrentRegistryKey().GetValues(): + if value.DataIsBinaryData(): + if not header_shown: + header_shown = True + print frontend_utils.FormatHeader('Hex Dump') + # Print '-' 80 times. + self.output_writer.write(u'-'*80) + self.output_writer.write(u'\n') + self.output_writer.write( + frontend_utils.FormatOutputString('Attribute', value.name)) + self.output_writer.write(u'-'*80) + self.output_writer.write(u'\n') + self.output_writer.write( + frontend_utils.OutputWriter.GetHexDump(value.data)) + self.output_writer.write(u'\n') + self.output_writer.write(u'+-'*40) + self.output_writer.write(u'\n') + + self.output_writer.flush() + + @magic.line_magic('plugin') + def ParseWithPlugin(self, line): + """Parse a Registry key using a specific plugin.""" + if not IsLoaded(): + print u'No hive loaded, unable to parse.' + return + + current_hive = PregCache.hive_storage.loaded_hive + if not current_hive: + return + + if not line: + print u'No plugin name added.' + return + + plugin_name = line + if '-h' in line: + items = line.split() + if len(items) != 2: + print u'Wrong usage: plugin [-h] PluginName' + return + if items[0] == '-h': + plugin_name = items[1] + else: + plugin_name = items[0] + + if not plugin_name.startswith('winreg'): + plugin_name = u'winreg_{0:s}'.format(plugin_name) + + hive_type = current_hive.type + plugins_list = parsers_manager.ParsersManager.GetWindowsRegistryPlugins() + plugin_found = False + for plugin_cls in plugins_list.GetKeyPlugins(hive_type): + plugin = plugin_cls(reg_cache=current_hive.reg_cache) + if plugin.plugin_name == plugin_name: + # If we found the correct plugin. + plugin_found = True + break + + if not plugin_found: + print u'No plugin named: {0:s} available for Registry type {1:s}'.format( + plugin_name, hive_type) + return + + if not hasattr(plugin, 'REG_KEYS'): + print u'Plugin: {0:s} has no key information.'.format(line) + return + + if '-h' in line: + print frontend_utils.FormatHeader(plugin_name) + print frontend_utils.FormatOutputString('Description', plugin.__doc__) + print u'' + for registry_key in plugin.expanded_keys: + print frontend_utils.FormatOutputString('Registry Key', registry_key) + return + + if not plugin.expanded_keys: + plugin.ExpandKeys(PregCache.parser_context) + + # Clear the last results from parse key. + PregCache.events_from_last_parse = [] + + # Defining outside of for loop for optimization. + get_key_by_path = current_hive.GetKeyByPath + for registry_key in plugin.expanded_keys: + key = get_key_by_path(registry_key) + if not key: + print u'Key: {0:s} not found'.format(registry_key) + continue + + # Move the current location to the key to be parsed. + self.ChangeDirectory(registry_key) + # Parse the key. + print_strings = ParseKey( + key=current_hive.GetCurrentRegistryKey(), hive_helper=current_hive, + shell_helper=PregCache.shell_helper, verbose=False, + use_plugins=[plugin_name]) + self.output_writer.write(u'\n'.join(print_strings)) + self.output_writer.flush() + + @magic.line_magic('pwd') + def PrintCurrentWorkingDirectory(self, unused_line): + """Print the current path.""" + if not IsLoaded(): + return + + current_hive = PregCache.hive_storage.loaded_hive + if not current_hive: + return + + self.output_writer.write(u'{0:s}\n'.format( + current_hive.GetCurrentRegistryPath())) + + @magic.line_magic('redirect_output') + def RedirectOutput(self, output_object): + """Change the output writer to redirect plugin output to a file.""" + + if isinstance(output_object, basestring): + output_object = open(output_object, 'wb') + + if hasattr(output_object, 'write'): + self.output_writer = output_object + + +def StripCurlyBrace(string): + """Return a format "safe" string.""" + return string.replace('}', '}}').replace('{', '{{') + + +def IsLoaded(): + """Checks if a Windows Registry Hive is loaded.""" + current_hive = PregCache.hive_storage.loaded_hive + if not current_hive: + return False + + current_key = current_hive.GetCurrentRegistryKey() + if hasattr(current_key, 'path'): + return True + + if current_hive.name != 'N/A': + return True + + print ( + u'No hive loaded, cannot complete action. Use "hive list" ' + u'and "hive open" to load a hive.') + return False + + +def GetValue(value_name): + """Return a value object from the currently loaded Registry key. + + Args: + value_name: A string containing the name of the value to be retrieved. + + Returns: + The Registry value (instance of WinPyregfValue) if it exists, None if + either there is no currently loaded Registry key or if the value does + not exist. + """ + current_hive = PregCache.hive_storage.loaded_hive + current_key = current_hive.GetCurrentRegistryKey() + + if not current_key: + return + + return current_key.GetValue(value_name) + + +def GetValueData(value_name): + """Return the value data from a value in the currently loaded Registry key. + + Args: + value_name: A string containing the name of the value to be retrieved. + + Returns: + The data from a Registry value if it exists, None if either there is no + currently loaded Registry key or if the value does not exist. + """ + value = GetValue(value_name) + + if not value: + return + + return value.data + + +def GetCurrentKey(): + """Return the currently loaded Registry key (instance of WinPyregfKey). + + Returns: + The currently loaded Registry key (instance of WinPyregfKey) or None + if there is no loaded key. + """ + current_hive = PregCache.hive_storage.loaded_hive + return current_hive.GetCurrentRegistryKey() + + +def GetFormatString(event_object): + """Return back a format string that can be used for a given event object.""" + # Assign a default value to font align length. + align_length = 15 + + # Go through the attributes and see if there is an attribute + # value that is longer than the default font align length, and adjust + # it accordingly if found. + if hasattr(event_object, 'regvalue'): + attributes = event_object.regvalue.keys() + else: + attributes = event_object.GetAttributes().difference( + event_object.COMPARE_EXCLUDE) + + for attribute in attributes: + attribute_len = len(attribute) + if attribute_len > align_length and attribute_len < 30: + align_length = len(attribute) + + # Create the format string that will be used, using variable length + # font align length (calculated in the prior step). + return u'{{0:>{0:d}s}} : {{1!s}}'.format(align_length) + + +def GetEventHeader(event_object, descriptions, exclude_timestamp): + """Returns a list of strings that contains a header for the event. + + Args: + event_object: An event object (instance of event.EventObject). + descriptions: A list of strings describing the value of the header + timestamp. + exclude_timestamp: A boolean. If it is set to True the method + will not include the timestamp in the header. + + Returns: + A list of strings containing header information for the event. + """ + format_string = GetFormatString(event_object) + + # Create the strings to return. + ret_strings = [] + ret_strings.append(u'Key information.') + if not exclude_timestamp: + for description in descriptions: + ret_strings.append(format_string.format( + description, timelib.Timestamp.CopyToIsoFormat( + event_object.timestamp))) + if hasattr(event_object, 'keyname'): + ret_strings.append(format_string.format(u'Key Path', event_object.keyname)) + if event_object.timestamp_desc != eventdata.EventTimestamp.WRITTEN_TIME: + ret_strings.append(format_string.format( + u'Description', event_object.timestamp_desc)) + + ret_strings.append(frontend_utils.FormatHeader(u'Data', u'-')) + + return ret_strings + + +def GetEventBody(event_object, file_entry=None, show_hex=False): + """Returns a list of strings containing information from an event. + + Args: + event_object: An event object (instance of event.EventObject). + file_entry: An optional file entry object (instance of dfvfs.FileEntry) that + the event originated from. Default is None. + show_hex: A boolean, if set to True hex dump of the value is included in + the output. The default value is False. + + Returns: + A list of strings containing the event body. + """ + format_string = GetFormatString(event_object) + + ret_strings = [] + + timestamp_description = getattr( + event_object, 'timestamp_desc', eventdata.EventTimestamp.WRITTEN_TIME) + + if timestamp_description != eventdata.EventTimestamp.WRITTEN_TIME: + ret_strings.append(u'<{0:s}>'.format(timestamp_description)) + + if hasattr(event_object, 'regvalue'): + attributes = event_object.regvalue + else: + # TODO: Add a function for this to avoid repeating code. + keys = event_object.GetAttributes().difference( + event_object.COMPARE_EXCLUDE) + keys.discard('offset') + keys.discard('timestamp_desc') + attributes = {} + for key in keys: + attributes[key] = getattr(event_object, key) + + for attribute, value in attributes.items(): + ret_strings.append(format_string.format(attribute, value)) + + if show_hex and file_entry: + event_object.pathspec = file_entry.path_spec + ret_strings.append(frontend_utils.FormatHeader( + u'Hex Output From Event.', '-')) + ret_strings.append( + frontend_utils.OutputWriter.GetEventDataHexDump(event_object)) + + return ret_strings + + +def GetRangeForAllLoadedHives(): + """Return a range or a list of all loaded hives.""" + return range(0, GetTotalNumberOfLoadedHives()) + + +def GetTotalNumberOfLoadedHives(): + """Return the total number of Registy hives that are loaded.""" + return len(PregCache.hive_storage) + + +def ParseKey(key, shell_helper, hive_helper, verbose=False, use_plugins=None): + """Parse a single Registry key and return parsed information. + + Parses the Registry key either using the supplied plugin or trying against + all avilable plugins. + + Args: + key: The Registry key to parse, WinRegKey object or a string. + shell_helper: A shell helper object (instance of PregHelper). + hive_helper: A hive object (instance of PregHiveHelper). + verbose: Print additional information, such as a hex dump. + use_plugins: A list of plugin names to use, or none if all should be used. + + Returns: + A list of strings. + """ + print_strings = [] + if not hive_helper: + return + + if isinstance(key, basestring): + key = hive_helper.GetKeyByPath(key) + + if not key: + return + + # Detect Registry type. + registry_type = hive_helper.type + + plugins = {} + plugins_list = parsers_manager.ParsersManager.GetWindowsRegistryPlugins() + + # Compile a list of plugins we are about to use. + for weight in plugins_list.GetWeights(): + plugin_list = plugins_list.GetWeightPlugins(weight, registry_type) + plugins[weight] = [] + for plugin in plugin_list: + if use_plugins: + plugin_obj = plugin(reg_cache=hive_helper.reg_cache) + if plugin_obj.NAME in use_plugins: + plugins[weight].append(plugin_obj) + else: + plugins[weight].append(plugin( + reg_cache=hive_helper.reg_cache)) + + event_queue = single_process.SingleProcessQueue() + event_queue_consumer = PregEventObjectQueueConsumer(event_queue) + + # Build a parser context. + parser_context = shell_helper.BuildParserContext(event_queue) + + # Run all the plugins in the correct order of weight. + for weight in plugins: + for plugin in plugins[weight]: + plugin.Process(parser_context, key=key) + event_queue_consumer.ConsumeEventObjects() + if not event_queue_consumer.event_objects: + continue + + print_strings.append(u'') + print_strings.append( + u'{0:^80}'.format(u' ** Plugin : {0:s} **'.format( + plugin.plugin_name))) + print_strings.append(u'') + print_strings.append(u'[{0:s}] {1:s}'.format( + plugin.REG_TYPE, plugin.DESCRIPTION)) + print_strings.append(u'') + if plugin.URLS: + print_strings.append(u'Additional information can be found here:') + + for url in plugin.URLS: + print_strings.append(u'{0:>17s} {1:s}'.format(u'URL :', url)) + print_strings.append(u'') + + # TODO: move into the event queue consumer. + event_objects_and_timestamps = {} + event_object = event_queue_consumer.event_objects.pop(0) + while event_object: + PregCache.events_from_last_parse.append(event_object) + event_objects_and_timestamps.setdefault( + event_object.timestamp, []).append(event_object) + + if event_queue_consumer.event_objects: + event_object = event_queue_consumer.event_objects.pop(0) + else: + event_object = None + + if not event_objects_and_timestamps: + continue + + # If there is only a single timestamp then we'll include it in the + # header, otherwise each event will have it's own timestamp. + if len(event_objects_and_timestamps) > 1: + exclude_timestamp_in_header = True + else: + exclude_timestamp_in_header = False + + first = True + for event_timestamp in sorted(event_objects_and_timestamps): + if first: + first_event = event_objects_and_timestamps[event_timestamp][0] + descriptions = set() + for event_object in event_objects_and_timestamps[event_timestamp]: + descriptions.add(getattr(event_object, 'timestamp_desc', u'')) + print_strings.extend(GetEventHeader( + first_event, list(descriptions), exclude_timestamp_in_header)) + first = False + + if exclude_timestamp_in_header: + print_strings.append(u'') + print_strings.append(u'[{0:s}]'.format( + timelib.Timestamp.CopyToIsoFormat(event_timestamp))) + + for event_object in event_objects_and_timestamps[event_timestamp]: + print_strings.append(u'') + print_strings.extend(GetEventBody( + event_object, hive_helper.file_entry, verbose)) + + print_strings.append(u'') + + # Printing '*' 80 times. + print_strings.append(u'*'*80) + print_strings.append(u'') + + return print_strings + + +# TODO: Move this to dfVFS and improve. +def PathExists(file_path): + """Determine whether given file path exists as a file, directory or a device. + + Args: + file_path: A string denoting the file path that needs checking. + + Returns: + A tuple, a boolean indicating whether or not the path exists and a string + that contains the reason, if any, why this was not determined to be a file. + """ + if os.path.exists(file_path): + return True, u'' + + try: + if pysmdev.check_device(file_path): + return True, u'' + except IOError as exception: + return False, u'Unable to determine, with error: {0:s}'.format(exception) + + return False, u'Not an existing file.' + + +def RunModeConsole(front_end, options): + """Open up an iPython console. + + Args: + options: the command line arguments (instance of argparse.Namespace). + """ + namespace = {} + + function_name_length = 23 + banners = [] + banners.append(frontend_utils.FormatHeader( + u'Welcome to PREG - home of the Plaso Windows Registry Parsing.')) + banners.append(u'') + banners.append(u'Some of the commands that are available for use are:') + banners.append(u'') + banners.append(frontend_utils.FormatOutputString( + u'cd key', u'Navigate the Registry like a directory structure.', + function_name_length)) + banners.append(frontend_utils.FormatOutputString( + u'ls [-v]', ( + u'List all subkeys and values of a Registry key. If called as ' + u'ls True then values of keys will be included in the output.'), + function_name_length)) + banners.append(frontend_utils.FormatOutputString( + u'parse -[v]', u'Parse the current key using all plugins.', + function_name_length)) + banners.append(frontend_utils.FormatOutputString( + u'pwd', u'Print the working "directory" or the path of the current key.', + function_name_length)) + banners.append(frontend_utils.FormatOutputString( + u'plugin [-h] plugin_name', ( + u'Run a particular key-based plugin on the loaded hive. The correct ' + u'Registry key will be loaded, opened and then parsed.'), + function_name_length)) + banners.append(frontend_utils.FormatOutputString( + u'get_value value_name', ( + u'Get a value from the currently loaded Registry key.'))) + banners.append(frontend_utils.FormatOutputString( + u'get_value_data value_name', ( + u'Get a value data from a value stored in the currently loaded ' + u'Registry key.'))) + banners.append(frontend_utils.FormatOutputString( + u'get_key', u'Return the currently loaded Registry key.')) + + banners.append(u'') + + # Build the global cache and prepare the tool. + hive_storage = PregStorage() + shell_helper = PregHelper(options, front_end, hive_storage) + parser_context = shell_helper.BuildParserContext() + + PregCache.parser_context = parser_context + PregCache.shell_helper = shell_helper + PregCache.hive_storage = hive_storage + + registry_types = getattr(options, 'regfile', None) + if isinstance(registry_types, basestring): + registry_types = registry_types.split(u',') + + if not registry_types: + registry_types = [ + 'NTUSER', 'USRCLASS', 'SOFTWARE', 'SYSTEM', 'SAM', 'SECURITY'] + PregCache.shell_helper.Scan(registry_types) + + if len(PregCache.hive_storage) == 1: + PregCache.hive_storage.SetOpenHive(0) + hive_helper = PregCache.hive_storage.loaded_hive + banners.append( + u'Opening hive: {0:s} [{1:s}]'.format( + hive_helper.path, hive_helper.collector_name)) + ConsoleConfig.SetPrompt(hive_path=hive_helper.path) + + loaded_hive = PregCache.hive_storage.loaded_hive + + if loaded_hive and loaded_hive.name != u'N/A': + banners.append( + u'Registry hive: {0:s} is available and loaded.'.format( + loaded_hive.name)) + else: + banners.append(u'More than one Registry file ready for use.') + banners.append(u'') + banners.append(PregCache.hive_storage.ListHives()) + banners.append(u'') + banners.append(( + u'Use "hive open INDEX" to load a hive and "hive list" to see a ' + u'list of available hives.')) + + banners.append(u'') + banners.append(u'Happy command line console fu-ing.') + + # Adding variables in scope. + namespace.update(globals()) + namespace.update({ + 'get_current_key': GetCurrentKey, + 'get_key': GetCurrentKey, + 'get_value': GetValue, + 'get_value_data': GetValueData, + 'number_of_hives': GetTotalNumberOfLoadedHives, + 'range_of_hives': GetRangeForAllLoadedHives, + 'options': options}) + + ipshell_config = ConsoleConfig.GetConfig() + + if loaded_hive: + ConsoleConfig.SetPrompt( + hive_path=loaded_hive.name, config=ipshell_config) + else: + ConsoleConfig.SetPrompt(hive_path=u'NO HIVE LOADED', config=ipshell_config) + + # Starting the shell. + ipshell = InteractiveShellEmbed( + user_ns=namespace, config=ipshell_config, banner1=u'\n'.join(banners), + exit_msg='') + ipshell.confirm_exit = False + # Adding "magic" functions. + ipshell.register_magics(MyMagics) + # Set autocall to two, making parenthesis not necessary when calling + # function names (although they can be used and are necessary sometimes, + # like in variable assignements, etc). + ipshell.autocall = 2 + # Registering command completion for the magic commands. + ipshell.set_hook('complete_command', CdCompleter, str_key='%cd') + ipshell.set_hook('complete_command', VerboseCompleter, str_key='%ls') + ipshell.set_hook('complete_command', VerboseCompleter, str_key='%parse') + ipshell.set_hook('complete_command', PluginCompleter, str_key='%plugin') + + ipshell() + + +def Main(): + """Run the tool.""" + output_writer = frontend.StdoutFrontendOutputWriter() + front_end = PregFrontend(output_writer) + + epilog = textwrap.dedent(""" + +Example usage: + +Parse the SOFTWARE hive from an image: + {0:s} [--vss] [--vss-stores VSS_STORES] -i IMAGE_PATH [-o OFFSET] -c SOFTWARE + +Parse an userassist key within an extracted hive: + {0:s} -p userassist MYNTUSER.DAT + +Parse the run key from all Registry keys (in vss too): + {0:s} --vss -i IMAGE_PATH [-o OFFSET] -p run + +Open up a console session for the SYSTEM hive inside an image: + {0:s} -i IMAGE_PATH [-o OFFSET] -c SYSTEM + """).format(os.path.basename(sys.argv[0])) + + description = textwrap.dedent(""" +preg is a simple Windows Registry parser using the plaso Registry +plugins and image parsing capabilities. + +It uses the back-end libraries of plaso to read raw image files and +extract Registry files from VSS and restore points and then runs the +Registry plugins of plaso against the Registry hive and presents it +in a textual format. + + """) + + arg_parser = argparse.ArgumentParser( + epilog=epilog, description=description, add_help=False, + formatter_class=argparse.RawDescriptionHelpFormatter) + + # Create the different argument groups. + mode_options = arg_parser.add_argument_group(u'Run Mode Options') + image_options = arg_parser.add_argument_group(u'Image Options') + info_options = arg_parser.add_argument_group(u'Informational Options') + additional_data = arg_parser.add_argument_group(u'Additional Options') + + mode_options.add_argument( + '-c', '--console', dest='console', action='store_true', default=False, + help=u'Drop into a console session Instead of printing output to STDOUT.') + + additional_data.add_argument( + '-r', '--restore_points', dest='restore_points', action='store_true', + default=False, help=u'Include restore points for hive locations.') + + image_options.add_argument( + '-i', '--image', dest='image', action='store', type=unicode, default='', + metavar='IMAGE_PATH', + help=(u'If the Registry file is contained within a storage media image, ' + u'set this option to specify the path of image file.')) + + front_end.AddImageOptions(image_options) + + info_options.add_argument( + '-v', '--verbose', dest='verbose', action='store_true', default=False, + help=u'Print sub key information.') + + info_options.add_argument( + '-h', '--help', action='help', help=u'Show this help message and exit.') + + front_end.AddVssProcessingOptions(additional_data) + + info_options.add_argument( + '--info', dest='info', action='store_true', default=False, + help=u'Print out information about supported plugins.') + + mode_options.add_argument( + '-p', '--plugins', dest='plugin_names', action='append', default=[], + type=unicode, metavar='PLUGIN_NAME', + help=( + u'Substring match of the Registry plugin to be used, this ' + u'parameter can be repeated to create a list of plugins to be ' + u'run against, eg: "-p userassist -p rdp" or "-p userassist".')) + + mode_options.add_argument( + '-k', '--key', dest='key', action='store', default='', type=unicode, + metavar='REGISTRY_KEYPATH', + help=(u'A Registry key path that the tool should parse using all ' + u'available plugins.')) + + arg_parser.add_argument( + 'regfile', action='store', metavar='REGHIVE', nargs='?', + help=(u'The Registry hive to read key from (not needed if running ' + u'using a plugin)')) + + # Parse the command line arguments. + options = arg_parser.parse_args() + + if options.info: + print front_end.GetListOfAllPlugins() + return True + + try: + front_end.ParseOptions(options, source_option='image') + except errors.BadConfigOption as exception: + arg_parser.print_usage() + print u'' + logging.error('{0:s}'.format(exception)) + return False + + # Run the tool, using the run mode according to the options passed + # to the tool. + if front_end.run_mode == front_end.RUN_MODE_CONSOLE: + RunModeConsole(front_end, options) + if front_end.run_mode == front_end.RUN_MODE_REG_KEY: + front_end.RunModeRegistryKey(options, options.plugin_names) + elif front_end.run_mode == front_end.RUN_MODE_REG_PLUGIN: + front_end.RunModeRegistryPlugin(options, options.plugin_names) + elif front_end.run_mode == front_end.RUN_MODE_REG_FILE: + front_end.RunModeRegistryFile(options, options.regfile) + + return True + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/plaso/frontend/preg_test.py b/plaso/frontend/preg_test.py new file mode 100644 index 0000000..10397f5 --- /dev/null +++ b/plaso/frontend/preg_test.py @@ -0,0 +1,353 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the preg front-end.""" + +import StringIO +import unittest + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory + +from plaso.frontend import preg +from plaso.frontend import test_lib + +from plaso.lib import errors + + +class StringIOOutputWriter(object): + """Class that implements a StringIO output writer.""" + + def __init__(self): + """Initialize the string output writer.""" + super(StringIOOutputWriter, self).__init__() + self._string_obj = StringIO.StringIO() + + # Make the output writer compatible with a filehandle interface. + self.write = self.Write + + def flush(self): + """Flush the internal buffer.""" + self._string_obj.flush() + + def GetValue(self): + """Returns the write buffer from the output writer.""" + return self._string_obj.getvalue() + + def GetLine(self): + """Returns a single line read from the output buffer.""" + return self._string_obj.readline() + + def SeekToBeginning(self): + """Seeks the output buffer to the beginning of the buffer.""" + self._string_obj.seek(0) + + def Write(self, string): + """Writes a string to the StringIO object.""" + self._string_obj.write(string) + + +class PregFrontendTest(test_lib.FrontendTestCase): + """Tests for the preg front-end.""" + + def _GetHelperAndOutputWriter(self): + """Return a helper object (instance of PregHelper) and an output writer.""" + hive_storage = preg.PregStorage() + options = test_lib.Options() + + output_writer = StringIOOutputWriter() + test_front_end = preg.PregFrontend(output_writer) + + shell_helper = preg.PregHelper(options, test_front_end, hive_storage) + + return shell_helper, output_writer + + def testBadRun(self): + """Test few functions that should raise exceptions.""" + shell_helper, _ = self._GetHelperAndOutputWriter() + + options = test_lib.Options() + options.foo = u'bar' + + with self.assertRaises(errors.BadConfigOption): + shell_helper.tool_front_end.ParseOptions(options) + + options.regfile = 'this_path_does_not_exist' + with self.assertRaises(errors.BadConfigOption): + shell_helper.tool_front_end.ParseOptions(options) + + def testFrontEnd(self): + """Test various functions inside the front end object.""" + shell_helper, _ = self._GetHelperAndOutputWriter() + front_end = shell_helper.tool_front_end + + options = test_lib.Options() + hive_path = self._GetTestFilePath([u'NTUSER.DAT']) + options.regfile = hive_path + + front_end.ParseOptions(options, source_option='image') + + # Test the --info parameter to the tool. + info_string = front_end.GetListOfAllPlugins() + self.assertTrue(u'* Supported Plugins *' in info_string) + self.assertTrue( + u'userassist : Parser for User Assist Registry data' in info_string) + self.assertTrue( + u'services : Parser for services and drivers Registry ' in info_string) + + # Get paths to various registry files. + hive_paths_for_usersassist = set([ + u'/Documents And Settings/.+/NTUSER.DAT', '/Users/.+/NTUSER.DAT']) + # Testing functions within the front end, thus need to access protected + # members. + # pylint: disable=protected-access + test_paths_for_userassist = set( + front_end._GetRegistryFilePaths(u'userassist')) + + self.assertEquals(hive_paths_for_usersassist, test_paths_for_userassist) + + # Set the path to the system registry. + preg.PregCache.knowledge_base_object.pre_obj.sysregistry = u'C:/Windows/Foo' + + # Test the SOFTWARE hive. + test_paths = front_end._GetRegistryFilePaths(u'', u'SOFTWARE') + self.assertEqual(test_paths, [u'C:/Windows/Foo/SOFTWARE']) + + def testMagicClass(self): + """Test the magic class functions.""" + # Open up a hive. + hive_path = self._GetTestFilePath([u'NTUSER.DAT']) + shell_helper, _ = self._GetHelperAndOutputWriter() + + hive_helper = shell_helper.OpenHive(hive_path, None) + self.assertEqual(hive_helper.name, u'NTUSER.DAT') + + preg.PregCache.shell_helper = shell_helper + preg.PregCache.hive_storage = shell_helper.hive_storage + preg.PregCache.parser_context = shell_helper.BuildParserContext() + + # Mark this hive as the currently opened one. + preg.PregCache.hive_storage.AppendHive(hive_helper) + storage_length = len(preg.PregCache.hive_storage) + preg.PregCache.hive_storage.SetOpenHive(storage_length - 1) + + magic_obj = preg.MyMagics(None) + + # Change directory and verify it worked. + registry_key_path = u'\\Software\\JavaSoft\\Java Update\\Policy' + magic_obj.ChangeDirectory(registry_key_path) + registry_key = preg.GetCurrentKey() + self.assertEquals(registry_key.path, registry_key_path) + self.assertEquals( + hive_helper.GetCurrentRegistryKey().path, registry_key_path) + + # List the directory content. + output_string = StringIOOutputWriter() + magic_obj.RedirectOutput(output_string) + magic_obj.ListDirectoryContent(u'') + expected_strings = [ + u'-r-xr-xr-x [REG_SZ] LastUpdateBeginTime', + u'-r-xr-xr-x [REG_SZ] LastUpdateFinishTime', + u'-r-xr-xr-x [REG_SZ] VersionXmlURL\n'] + self.assertEquals(output_string.GetValue(), u'\n'.join(expected_strings)) + + # Parse the current key. + output_string = StringIOOutputWriter() + magic_obj.RedirectOutput(output_string) + magic_obj.ParseCurrentKey(u'') + partial_string = ( + u'LastUpdateFinishTime : [REG_SZ] Tue, 04 Aug 2009 15:18:35 GMT') + self.assertTrue(partial_string in output_string.GetValue()) + + # Parse using a plugin. + output_string = StringIOOutputWriter() + magic_obj.RedirectOutput(output_string) + magic_obj.ParseWithPlugin(u'userassist') + + partial_string = ( + u'UEME_RUNPIDL:%csidl2%\\BCWipe 3.0\\BCWipe Task Manager.lnk ' + u': [Count: 1]') + self.assertTrue(partial_string in output_string.GetValue()) + + # Let's see where we are at the moment. + output_string = StringIOOutputWriter() + magic_obj.RedirectOutput(output_string) + magic_obj.PrintCurrentWorkingDirectory(u'') + + current_directory = ( + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{5E6AB780-7743-11CF-A12B-00AA004AE837}\n') + + self.assertEquals(current_directory, output_string.GetValue()) + + def testParseHive(self): + """Test the ParseHive function.""" + shell_helper, _ = self._GetHelperAndOutputWriter() + + # TODO: Replace this once _GetTestFileEntry is pushed in. + system_hive_path = self._GetTestFilePath(['SYSTEM']) + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=system_hive_path) + collectors = [('current', None)] + + key_paths = [ + u'\\ControlSet001\\Enum\\USBSTOR', + u'\\ControlSet001\\Enum\\USB', + u'\\ControlSet001\\Control\\Windows'] + + output = shell_helper.tool_front_end.ParseHive( + path_spec, collectors, shell_helper, key_paths=key_paths, + use_plugins=None, verbose=False) + + self.assertTrue(u'ComponentizedBuild : [REG_DWORD_LE] 1' in output) + self.assertTrue(u'subkey_name : Disk&Ven_HP&Prod_v100w&Rev_1024' in output) + + def testRunPlugin(self): + """Tests running the preg frontend against a plugin.""" + shell_helper, output_writer = self._GetHelperAndOutputWriter() + + options = shell_helper.tool_options + options.regfile = self._GetTestFilePath(['NTUSER.DAT']) + options.verbose = False + + shell_helper.tool_front_end.ParseOptions(options, source_option='image') + shell_helper.tool_front_end.RunModeRegistryPlugin(options, u'userassist') + + self.assertTrue(( + u'UEME_RUNPATH:C:\\Program Files\\Internet Explorer\\iexplore.exe : ' + u'[Count: 1]') in output_writer.GetValue()) + + # TODO: Add tests that parse a disk image. Test both Registry key parsing + # and plugin parsing. + + def testRunAgainstKey(self): + """Tests running the preg frontend against a Registry key.""" + shell_helper, output_writer = self._GetHelperAndOutputWriter() + + options = shell_helper.tool_options + options.key = u'\\Microsoft\\Windows NT\\CurrentVersion' + options.regfile = self._GetTestFilePath(['SOFTWARE']) + options.verbose = False + + shell_helper.tool_front_end.ParseOptions(options, source_option='image') + shell_helper.tool_front_end.RunModeRegistryKey(options, u'') + + self.assertTrue( + u'Product name : Windows 7 Ultimate' in output_writer.GetValue()) + + def testRunAgainstFile(self): + """Tests running the preg frontend against a whole Registry file.""" + shell_helper, output_writer = self._GetHelperAndOutputWriter() + + options = shell_helper.tool_options + options.regfile = self._GetTestFilePath(['SOFTWARE']) + + shell_helper.tool_front_end.ParseOptions(options, source_option='image') + shell_helper.tool_front_end.RunModeRegistryFile(options, options.regfile) + + plugins = set() + registry_keys = set() + line_count = 0 + + output_writer.SeekToBeginning() + line = output_writer.GetLine() + while line: + line_count += 1 + line = line.lstrip() + if line.startswith('** Plugin'): + _, _, plugin_name = line.rpartition(':') + plugins.add(plugin_name.strip()) + if line.startswith('Key Path :'): + _, _, key_name = line.rpartition(':') + registry_keys.add(key_name.strip()) + line = output_writer.GetLine() + + # Define the minimum set of plugins that need to be in the output. + expected_plugins = set([ + u'winreg_run_software **', u'winreg_task_cache **', u'winreg_winver **', + u'winreg_msie_zone_software **', u'winreg_default **']) + + self.assertTrue(expected_plugins.issubset(plugins)) + + self.assertTrue(( + u'\\Microsoft\\Windows NT\\CurrentVersion\\Schedule\\' + u'TaskCache') in registry_keys) + self.assertTrue( + u'\\Microsoft\\Windows\\CurrentVersion\\RunOnce' in registry_keys) + + # The output should grow with each newly added plugin, and it might be + # reduced with changes to the codebase, yet there should be at least 1.500 + # lines in the output. + self.assertGreater(line_count, 1500) + + def testTopLevelMethods(self): + """Test few of the top level methods in the preg module.""" + shell_helper, _ = self._GetHelperAndOutputWriter() + + # Set the cache. + preg.PregCache.shell_helper = shell_helper + preg.PregCache.hive_storage = shell_helper.hive_storage + preg.PregCache.parser_context = shell_helper.BuildParserContext() + + # Open up a hive. + hive_path = self._GetTestFilePath([u'NTUSER.DAT']) + hive_helper = shell_helper.OpenHive(hive_path, None) + preg.PregCache.hive_storage.AppendHive(hive_helper) + preg.PregCache.hive_storage.SetOpenHive( + len(preg.PregCache.hive_storage) - 1) + + self.assertTrue(preg.IsLoaded()) + self.assertEqual( + preg.PregCache.hive_storage.loaded_hive.name, u'NTUSER.DAT') + + # Open a Registry key using the magic class. + registry_key_path = u'\\Software\\JavaSoft\\Java Update\\Policy' + magic_obj = preg.MyMagics(None) + magic_obj.ChangeDirectory(registry_key_path) + + registry_key = preg.GetCurrentKey() + hive_helper = preg.PregCache.hive_storage.loaded_hive + self.assertEquals(registry_key.path, registry_key_path) + self.assertEquals( + hive_helper.GetCurrentRegistryKey().path, registry_key_path) + + # Get a value out of the currently loaded Registry key. + value = preg.GetValue(u'VersionXmlURL') + self.assertEquals(value.name, u'VersionXmlURL') + + value_data = preg.GetValueData(u'VersionXmlURL') + self.assertEquals( + value_data, + u'http://javadl.sun.com/webapps/download/AutoDL?BundleId=33742') + + # Parse a Registry key. + parsed_strings = preg.ParseKey( + registry_key, shell_helper=shell_helper, hive_helper=hive_helper) + self.assertTrue(parsed_strings[1].lstrip().startswith(u'** Plugin : ')) + + # Change back to the root key. + magic_obj.ChangeDirectory(u'') + registry_key = preg.GetCurrentKey() + self.assertEquals(registry_key.path, u'\\') + + # TODO: Add tests for formatting of events, eg: parse a key, get the event + # objects and test the formatting of said event object. + # TODO: Add tests for running in console mode. + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/frontend/presets.py b/plaso/frontend/presets.py new file mode 100644 index 0000000..f76cb82 --- /dev/null +++ b/plaso/frontend/presets.py @@ -0,0 +1,72 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Helper file for filtering out parsers.""" + +categories = { + 'win_gen': [ + 'bencode', 'esedb', 'filestat', 'google_drive', 'java_idx', 'lnk', + 'mcafee_protection', 'olecf', 'openxml', 'prefetch', + 'skydrive_log_error', 'skydrive_log', 'skype', + 'symantec_scanlog', 'webhist', 'winfirewall', 'winjob', + 'winreg'], + 'winxp': [ + 'recycle_bin_info2', 'win_gen', 'winevt'], + 'winxp_slow': [ + 'hachoir', 'winxp'], + 'win7': [ + 'recycle_bin', 'custom_destinations', 'olecf_automatic_destinations', + 'win_gen', 'winevtx'], + 'win7_slow': [ + 'hachoir', 'win7'], + 'webhist': [ + 'chrome_cache', 'chrome_cookies', 'chrome_extension_activity', + 'chrome_history', 'firefox_cache', 'firefox_cookies', + 'firefox_downloads', 'firefox_history', 'java_idx', 'msie_webcache', + 'msiecf', 'opera_global', 'opera_typed_history', 'safari_history'], + 'linux': [ + 'bencode', 'filestat', 'google_drive', 'java_idx', 'olecf', 'openxml', + 'pls_recall', 'popularity_contest', 'selinux', 'skype', 'syslog', + 'utmp', 'webhist', 'xchatlog', 'xchatscrollback', 'zeitgeist'], + 'macosx': [ + 'appusage', 'asl_log', 'bencode', 'bsm_log', 'cups_ipp', 'filestat', + 'google_drive', 'java_idx', 'ls_quarantine', 'mac_appfirewall_log', + 'mac_document_versions', 'mac_keychain', 'mac_securityd', + 'mackeeper_cache', 'macwifi', 'olecf', 'openxml', 'plist', 'skype', + 'utmpx', 'webhist'], + # TODO: Once syslog parser has been rewritten to be faster than the current + # one it's moved out of the default parsers for Mac OS X and into the "slow" + # mode. + 'macosx_slow': ['macosx', 'syslog'], + 'android': [ + 'android_app_usage', 'android_calls', 'android_sms'], +} + + +def GetParsersFromCategory(category): + """Return a list of parsers from a parser category.""" + return_list = [] + if category not in categories: + return return_list + + for item in categories.get(category): + if item in categories: + return_list.extend(GetParsersFromCategory(item)) + else: + return_list.append(item) + + return return_list diff --git a/plaso/frontend/pshell.py b/plaso/frontend/pshell.py new file mode 100755 index 0000000..4e33403 --- /dev/null +++ b/plaso/frontend/pshell.py @@ -0,0 +1,498 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a console, the CLI friendly front-end to plaso.""" + +import argparse +import logging +import os +import random +import sys +import tempfile + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +try: + # Support version 1.X of IPython. + # pylint: disable=no-name-in-module + from IPython.terminal.embed import InteractiveShellEmbed +except ImportError: + # Support version older than 1.X of IPython. + # pylint: disable=no-name-in-module + from IPython.frontend.terminal.embed import InteractiveShellEmbed + +from IPython.config.loader import Config + +# pylint: disable=unused-import +from plaso import analysis +from plaso import filters +from plaso import formatters +from plaso import output +from plaso import parsers +from plaso import preprocessors + +from plaso.classifier import scanner + +from plaso.engine import collector +from plaso.engine import engine +from plaso.engine import queue +from plaso.engine import single_process +from plaso.engine import utils as engine_utils + +from plaso.frontend import frontend +from plaso.frontend import utils as frontend_utils + +from plaso.lib import binary +from plaso.lib import bufferlib +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import eventdata +from plaso.lib import filter_interface +from plaso.lib import lexer +from plaso.lib import objectfilter +from plaso.lib import output as output_lib +from plaso.lib import pfilter +from plaso.lib import proxy +from plaso.lib import putils +from plaso.lib import registry as class_registry +from plaso.lib import storage +from plaso.lib import timelib +from plaso.lib import utils + +from plaso.multi_processing import foreman +from plaso.multi_processing import rpc_proxy +from plaso.multi_processing import process_info + +from plaso.output import helper as output_helper + +from plaso.parsers import manager as parsers_manager +from plaso.parsers import plugins +from plaso.parsers import text_parser +from plaso.proto import plaso_storage_pb2 + +from plaso.serializer import interface as serializer_interface +from plaso.serializer import json_serializer +from plaso.serializer import protobuf_serializer + +from plaso.unix import bsmtoken + +from plaso.winnt import environ_expand +from plaso.winnt import known_folder_ids + +from plaso.winreg import cache as win_registry_cache +from plaso.winreg import interface as win_registry_interface +from plaso.winreg import path_expander +from plaso.winreg import utils as win_registry_utils +from plaso.winreg import winpyregf +from plaso.winreg import winregistry + + +class PshellFrontend(frontend.ExtractionFrontend): + """Class that implements the pshell front-end.""" + + _BYTES_IN_A_MIB = 1024 * 1024 + + def __init__(self): + """Initializes the front-end object.""" + input_reader = frontend.StdinFrontendInputReader() + output_writer = frontend.StdoutFrontendOutputWriter() + + super(PshellFrontend, self).__init__(input_reader, output_writer) + + +def FindAllOutputs(): + """FindAllOutputs() - All available outputs.""" + return putils.FindAllOutputs() + + +def GetEventData(event_proto, before=0): + """Prints a hexdump of the event data.""" + return frontend_utils.OutputWriter.GetEventDataHexDump(event_proto, before) + + +def GetFileEntryFromEventObject(event_object): + """Return a file entry object from a pathspec object. + + Args: + event_object: An event object (an instance of EventObject). + + Returns: + A file entry object (instance of vfs.file_entry.FileEntry) or + None if the event object doesn't have a defined path spec. + """ + path_spec = getattr(event_object, 'pathspec', None) + + if not path_spec: + return + + return path_spec_resolver.Resolver.OpenFileEntry(path_spec) + + +def GetParserNames(parser_filter_string=None): + """Retrieves the parser names. + + Args: + parser_filter_string: Optional parser filter string. The default is None. + + Returns: + A list of parser names. + """ + return parsers_manager.ParsersManager.GetParserNames( + parser_filter_string=parser_filter_string) + + +def GetParserObjects(parser_filter_string=None): + """Retrieves the parser objects. + + Args: + parser_filter_string: Optional parser filter string. The default is None. + + Returns: + A list of parser objects (instances of BaseParser). + """ + return parsers_manager.ParsersManager.GetParserObjects( + parser_filter_string=parser_filter_string) + + +def OpenOSFile(path): + """Opens a file entry from the OS.""" + if not os.path.isfile(path): + logging.error(u'File: {0:s} does not exist.'.format(path)) + return + + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=path) + return path_spec_resolver.Resolver.OpenFileEntry(path_spec) + + +def OpenStorageFile(storage_path): + """Opens a storage file and returns the storage file object.""" + if not os.path.isfile(storage_path): + return + + try: + store = storage.StorageFile(storage_path, read_only=True) + except IOError: + print 'Unable to load storage file, not a storage file?' + + return store + + +def OpenTskFile(image_path, image_offset, path=None, inode=None): + """Opens a file entry of a file inside an image file.""" + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=image_path) + + if image_offset > 0: + volume_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TSK_PARTITION, start_offset=image_offset, + parent=path_spec) + else: + volume_path_spec = path_spec + + if inode is not None: + if path is None: + path = u'' + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TSK, inode=inode, location=path, + parent=volume_path_spec) + else: + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TSK, location=path, parent=volume_path_spec) + + return path_spec_resolver.Resolver.OpenFileEntry(path_spec) + + +def OpenVssFile(path, image_path, store_number, image_offset): + """Opens a file entry inside a VSS inside an image file.""" + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=image_path) + + if image_offset > 0: + volume_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TSK_PARTITION, start_offset=image_offset, + parent=path_spec) + else: + volume_path_spec = path_spec + + store_number -= 1 + + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_VSHADOW, store_index=store_number, + parent=volume_path_spec) + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TSK, location=path, parent=path_spec) + + return path_spec_resolver.Resolver.OpenFileEntry(path_spec) + + +def ParseFile(file_entry): + """Parse a file given a file entry or path and return a list of results. + + Args: + file_entry: Either a file entry object (instance of dfvfs.FileEntry) + or a string containing a path (absolute or relative) to a + local file. + + Returns: + A list of event object (instance of EventObject) that were extracted from + the file (or an empty list if no events were extracted). + """ + if not file_entry: + return + + if isinstance(file_entry, basestring): + file_entry = OpenOSFile(file_entry) + + # Set up the engine. + # TODO: refactor and add queue limit. + collection_queue = single_process.SingleProcessQueue() + storage_queue = single_process.SingleProcessQueue() + parse_error_queue = single_process.SingleProcessQueue() + engine_object = engine.BaseEngine( + collection_queue, storage_queue, parse_error_queue) + + # Create a worker. + worker_object = engine_object.CreateExtractionWorker(0) + # TODO: add support for parser_filter_string. + worker_object.InitalizeParserObjects() + worker_object.ParseFileEntry(file_entry) + + collection_queue.SignalEndOfInput() + engine_object.SignalEndOfInputStorageQueue() + + results = [] + while True: + try: + item = storage_queue.PopItem() + except errors.QueueEmpty: + break + + if isinstance(item, queue.QueueEndOfInput): + break + + results.append(item) + return results + + +def Pfile2File(file_object, path): + """Saves a file-like object to the path.""" + return frontend_utils.OutputWriter.WriteFile(file_object, path) + + +def PrintTimestamp(timestamp): + """Prints a human readable timestamp from a timestamp value.""" + return frontend_utils.OutputWriter.GetDateTimeString(timestamp) + + +def PrintTimestampFromEvent(event_object): + """Prints a human readable timestamp from values stored in an event object.""" + return PrintTimestamp(getattr(event_object, 'timestamp', 0)) + + +def Main(): + """Start the tool.""" + temp_location = tempfile.gettempdir() + + options = putils.Options() + + # Set the default options. + options.buffer_size = 0 + options.debug = False + options.filename = '.' + options.file_filter = '' + options.filter = '' + options.image = False + options.image_offset = None + options.image_offset_bytes = None + options.old_preprocess = False + options.open_files = False + options.output = os.path.join(temp_location, 'wheredidmytimelinego.dump') + options.output_module = '' + options.parsers = '' + options.parse_vss = False + options.preprocess = False + options.recursive = False + options.single_process = False + options.timezone = 'UTC' + options.workers = 5 + + format_str = '[%(levelname)s] (%(processName)-10s) %(message)s' + logging.basicConfig(format=format_str) + + front_end = PshellFrontend() + + try: + front_end.ParseOptions(options, source_option='filename') + front_end.SetStorageFile(options.output) + except errors.BadConfigOption as exception: + logging.error(u'{0:s}'.format(exception)) + + # TODO: move to frontend object. + if options.image and options.image_offset_bytes is None: + if options.image_offset is not None: + bytes_per_sector = getattr(options, 'bytes_per_sector', 512) + options.image_offset_bytes = options.image_offset * bytes_per_sector + else: + options.image_offset_bytes = 0 + + namespace = {} + + pre_obj = event.PreprocessObject() + + namespace.update(globals()) + namespace.update({ + 'frontend': front_end, + 'pre_obj': pre_obj, + 'options': options, + 'find_all_output': FindAllOutputs, + 'parse_file': ParseFile, + 'timestamp_from_event': PrintTimestampFromEvent, + 'message': formatters.manager.EventFormatterManager.GetMessageStrings}) + + # Include few random phrases that get thrown in once the user exists the + # shell. + _my_random_phrases = [ + u'I haven\'t seen timelines like this since yesterday.', + u'Timelining is super relaxing.', + u'Why did I not use the shell before?', + u'I like a do da cha cha', + u'I AM the Shogun of Harlem!', + (u'It doesn\'t matter if you win or lose, it\'s what you do with your ' + u'dancin\' shoes'), + u'I have not had a night like that since the seventies.', + u'Baker Team. They\'re all dead, sir.', + (u'I could have killed \'em all, I could\'ve killed you. In town ' + u'you\'re the law, out here it\'s me.'), + (u'Are you telling me that 200 of our men against your boy is a no-win ' + u'situation for us?'), + u'Hunting? We ain\'t huntin\' him, he\'s huntin\' us!', + u'You picked the wrong man to push', + u'Live for nothing or die for something', + u'I am the Fred Astaire of karate.', + (u'God gave me a great body and it\'s my duty to take care of my ' + u'physical temple.'), + u'This maniac should be wearing a number, not a badge', + u'Imagination is more important than knowledge.', + u'Do you hate being dead?', + u'You\'ve got 5 seconds... and 3 are up.', + u'He is in a gunfight right now. I\'m gonna have to take a message', + u'That would be better than losing your teeth', + u'The less you know, the more you make', + (u'A SQL query goes into a bar, walks up to two tables and asks, ' + u'"Can I join you?"'), + u'This is your captor speaking.', + (u'If I find out you\'re lying, I\'ll come back and kill you in your ' + u'own kitchen.'), + u'That would be better than losing your teeth', + (u'He\'s the kind of guy who would drink a gallon of gasoline so ' + u'that he can p*ss into your campfire.'), + u'I\'m gonna take you to the bank, Senator Trent. To the blood bank!', + u'I missed! I never miss! They must have been smaller than I thought', + u'Nah. I\'m just a cook.', + u'Next thing I know, you\'ll be dating musicians.', + u'Another cold day in hell', + u'Yeah, but I bet you she doesn\'t see these boys in the choir.', + u'You guys think you\'re above the law... well you ain\'t above mine!', + (u'One thought he was invincible... the other thought he could fly... ' + u'They were both wrong'), + u'To understand what recursion is, you must first understand recursion'] + + arg_description = ( + u'pshell is the interactive session tool that can be used to' + u'MISSING') + + arg_parser = argparse.ArgumentParser(description=arg_description) + + arg_parser.add_argument( + '-s', '--storage_file', '--storage-file', dest='storage_file', + type=unicode, default=u'', help=u'Path to a plaso storage file.', + action='store', metavar='PATH') + + configuration = arg_parser.parse_args() + + if configuration.storage_file: + store = OpenStorageFile(configuration.storage_file) + if store: + namespace.update({'store': store}) + + functions = [ + FindAllOutputs, GetEventData, GetParserNames, GetParserObjects, + OpenOSFile, OpenStorageFile, OpenTskFile, OpenVssFile, + ParseFile, Pfile2File, + PrintTimestamp, PrintTimestampFromEvent] + + functions_strings = [] + for function in functions: + docstring, _, _ = function.__doc__.partition(u'\n') + docstring = u'\t{0:s} - {1:s}'.format(function.__name__, docstring) + functions_strings.append(docstring) + functions_strings = u'\n'.join(functions_strings) + + banner = ( + u'--------------------------------------------------------------\n' + u' Welcome to Plaso console - home of the Plaso adventure land.\n' + u'--------------------------------------------------------------\n' + u'This is the place where everything is allowed, as long as it is ' + u'written in Python.\n\n' + u'Objects available:\n\toptions - set of options to the frontend.\n' + u'\tfrontend - A copy of the pshell frontend.\n' + u'\n' + u'All libraries have been imported and can be used, see help(frontend) ' + u'or help(parser).\n' + u'\n' + u'Base methods:\n' + u'{0:s}' + u'\n\tmessage - Print message strings from an event object.' + u'\n' + u'\n' + u'p.s. typing in "pdb" and pressing enter puts the shell in debug' + u'mode which causes all exceptions being sent to pdb.\n' + u'Happy command line console fu-ing.\n\n').format(functions_strings) + + exit_message = u'You are now leaving the winter wonderland.\n\n{}'.format( + random.choice(_my_random_phrases)) + + shell_config = Config() + # Make slight adjustments to the iPython prompt. + shell_config.PromptManager.out_template = ( + r'{color.Normal}[{color.Red}\#{color.Normal}]<<< ') + shell_config.PromptManager.in_template = ( + r'[{color.LightBlue}\T{color.Normal}] {color.LightPurple}\Y2\n' + r'{color.Normal}[{color.Red}\#{color.Normal}] \$ ') + shell_config.PromptManager.in2_template = r'.\D.>>>' + + ipshell = InteractiveShellEmbed( + user_ns=namespace, config=shell_config, banner1=banner, + exit_msg=exit_message) + ipshell.confirm_exit = False + # Set autocall to two, making parenthesis not necessary when calling + # function names (although they can be used and are necessary sometimes, + # like in variable assignments, etc). + ipshell.autocall = 2 + ipshell() + + return True + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/plaso/frontend/psort.py b/plaso/frontend/psort.py new file mode 100755 index 0000000..c534f44 --- /dev/null +++ b/plaso/frontend/psort.py @@ -0,0 +1,764 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Psort (Plaso Síar Og Raðar Þessu) - Makes output from Plaso Storage files. + +Sample Usage: + psort.py /tmp/mystorage.dump "date > '01-06-2012'" + +See additional details here: http://plaso.kiddaland.net/usage/psort +""" + +import argparse +import collections +import datetime +import time +import multiprocessing +import logging +import pdb +import sys + +import plaso +from plaso import analysis +from plaso import filters +from plaso import formatters # pylint: disable=unused-import +from plaso import output # pylint: disable=unused-import + +from plaso.analysis import context as analysis_context +from plaso.analysis import interface as analysis_interface +from plaso.artifacts import knowledge_base +from plaso.engine import queue +from plaso.frontend import frontend +from plaso.frontend import utils as frontend_utils +from plaso.lib import bufferlib +from plaso.lib import errors +from plaso.lib import output as output_lib +from plaso.lib import pfilter +from plaso.lib import timelib +from plaso.multi_processing import multi_process +from plaso.proto import plaso_storage_pb2 +from plaso.serializer import protobuf_serializer + +import pytz + + +class PsortFrontend(frontend.AnalysisFrontend): + """Class that implements the psort front-end.""" + + def __init__(self): + """Initializes the front-end object.""" + input_reader = frontend.StdinFrontendInputReader() + output_writer = frontend.StdoutFrontendOutputWriter() + + super(PsortFrontend, self).__init__(input_reader, output_writer) + + self._analysis_processes = [] + self._filter_buffer = None + self._filter_expression = None + self._filter_object = None + self._output_module_class = None + self._output_stream = None + self._slice_size = 5 + + def AddAnalysisPluginOptions(self, argument_group, plugin_names): + """Adds the analysis plugin options to the argument group + + Args: + argument_group: The argparse argument group (instance of + argparse._ArgumentGroup). + plugin_names: a string containing comma separated analysis plugin names. + + Raises: + BadConfigOption: if non-existing analysis plugin names are specified. + """ + if plugin_names == 'list': + return + + plugin_list = set([ + name.strip().lower() for name in plugin_names.split(',')]) + + # Get a list of all available plugins. + analysis_plugins = set([ + name.lower() for name, _, _ in analysis.ListAllPluginNames()]) + + # Get a list of the selected plugins (ignoring selections that did not + # have an actual plugin behind it). + plugins_to_load = analysis_plugins.intersection(plugin_list) + + # Check to see if we are trying to load plugins that do not exist. + difference = plugin_list.difference(analysis_plugins) + if difference: + raise errors.BadConfigOption( + u'Non-existing analysis plugins specified: {0:s}'.format( + u' '.join(difference))) + + plugins = analysis.LoadPlugins(plugins_to_load, None) + for plugin in plugins: + if plugin.ARGUMENTS: + for parameter, config in plugin.ARGUMENTS: + argument_group.add_argument(parameter, **config) + + def AddOutputModuleOptions(self, argument_group, module_names): + """Adds the output module options to the argument group + + Args: + argument_group: The argparse argument group (instance of + argparse._ArgumentGroup). + module_names: a string containing comma separated output module names. + """ + if module_names == 'list': + return + + modules_list = set([name.lower() for name in module_names]) + + for output_module_string, _ in output_lib.ListOutputFormatters(): + if not output_module_string.lower() in modules_list: + continue + + output_module = output_lib.GetOutputFormatter(output_module_string) + if output_module.ARGUMENTS: + for parameter, config in output_module.ARGUMENTS: + argument_group.add_argument(parameter, **config) + + def ListAnalysisPlugins(self): + """Lists the analysis modules.""" + self.PrintHeader('Analysis Modules') + format_length = 10 + for name, _, _ in analysis.ListAllPluginNames(): + if len(name) > format_length: + format_length = len(name) + + for name, description, plugin_type in analysis.ListAllPluginNames(): + if plugin_type == analysis_interface.AnalysisPlugin.TYPE_ANNOTATION: + type_string = 'Annotation/tagging plugin' + elif plugin_type == analysis_interface.AnalysisPlugin.TYPE_ANOMALY: + type_string = 'Anomaly plugin' + elif plugin_type == analysis_interface.AnalysisPlugin.TYPE_REPORT: + type_string = 'Summary/Report plugin' + elif plugin_type == analysis_interface.AnalysisPlugin.TYPE_STATISTICS: + type_string = 'Statistics plugin' + else: + type_string = 'Unknown type' + + description = u'{0:s} [{1:s}]'.format(description, type_string) + self.PrintColumnValue(name, description, format_length) + self.PrintSeparatorLine() + + def ListOutputModules(self): + """Lists the output modules.""" + self.PrintHeader('Output Modules') + for name, description in output_lib.ListOutputFormatters(): + self.PrintColumnValue(name, description, 10) + self.PrintSeparatorLine() + + def ListTimeZones(self): + """Lists the timezones.""" + self.PrintHeader('Zones') + max_length = 0 + for zone in pytz.all_timezones: + if len(zone) > max_length: + max_length = len(zone) + + self.PrintColumnValue('Timezone', 'UTC Offset', max_length) + for zone in pytz.all_timezones: + zone_obj = pytz.timezone(zone) + date_str = unicode(zone_obj.localize(datetime.datetime.utcnow())) + if '+' in date_str: + _, _, diff = date_str.rpartition('+') + diff_string = u'+{0:s}'.format(diff) + else: + _, _, diff = date_str.rpartition('-') + diff_string = u'-{0:s}'.format(diff) + self.PrintColumnValue(zone, diff_string, max_length) + self.PrintSeparatorLine() + + def ParseOptions(self, options): + """Parses the options and initializes the front-end. + + Args: + options: the command line arguments (instance of argparse.Namespace). + + Raises: + BadConfigOption: if the options are invalid. + """ + super(PsortFrontend, self).ParseOptions(options) + + output_format = getattr(options, 'output_format', None) + if not output_format: + raise errors.BadConfigOption(u'Missing output format.') + + self._output_module_class = output_lib.GetOutputFormatter(output_format) + if not self._output_module_class: + raise errors.BadConfigOption( + u'Invalid output format: {0:s}.'.format(output_format)) + + self._output_stream = getattr(options, 'write', None) + if not self._output_stream: + self._output_stream = sys.stdout + + self._filter_expression = getattr(options, 'filter', None) + if self._filter_expression: + self._filter_object = filters.GetFilter(self._filter_expression) + if not self._filter_object: + raise errors.BadConfigOption( + u'Invalid filter expression: {0:s}'.format(self._filter_expression)) + + # Check to see if we need to create a circular buffer. + if getattr(options, 'slicer', None): + self._slice_size = getattr(options, 'slice_size', 5) + self._filter_buffer = bufferlib.CircularBuffer(self._slice_size) + + def ParseStorage(self, options): + """Open a storage file and parse through it. + + Args: + options: the command line arguments (instance of argparse.Namespace). + + Returns: + A counter. + + Raises: + RuntimeError: if a non-recoverable situation is encountered. + """ + counter = None + + if options.slice: + if options.timezone == 'UTC': + zone = pytz.utc + else: + zone = pytz.timezone(options.timezone) + + timestamp = timelib.Timestamp.FromTimeString(options.slice, timezone=zone) + + # Convert number of minutes to microseconds. + range_operator = self._slice_size * 60 * 1000000 + + # Set the time range. + pfilter.TimeRangeCache.SetLowerTimestamp(timestamp - range_operator) + pfilter.TimeRangeCache.SetUpperTimestamp(timestamp + range_operator) + + if options.analysis_plugins: + read_only = False + else: + read_only = True + + try: + storage_file = self.OpenStorageFile(read_only=read_only) + except IOError as exception: + raise RuntimeError( + u'Unable to open storage file: {0:s} with error: {1:s}.'.format( + self._storage_file_path, exception)) + + with storage_file: + storage_file.SetStoreLimit(self._filter_object) + + try: + output_module = self._output_module_class( + storage_file, self._output_stream, options, self._filter_object) + except IOError as exception: + raise RuntimeError( + u'Unable to create output module with error: {0:s}'.format( + exception)) + + if not output_module: + raise RuntimeError(u'Missing output module.') + + if options.analysis_plugins: + logging.info(u'Starting analysis plugins.') + # Within all preprocessing objects, try to get the last one that has + # time zone information stored in it, the highest chance of it + # containing the information we are seeking (defaulting to the last + # one). + pre_objs = storage_file.GetStorageInformation() + pre_obj = pre_objs[-1] + for obj in pre_objs: + if getattr(obj, 'time_zone_str', ''): + pre_obj = obj + + # Fill in the collection information. + pre_obj.collection_information = {} + encoding = getattr(pre_obj, 'preferred_encoding', None) + if encoding: + cmd_line = ' '.join(sys.argv) + try: + pre_obj.collection_information['cmd_line'] = cmd_line.decode( + encoding) + except UnicodeDecodeError: + pass + pre_obj.collection_information['file_processed'] = ( + self._storage_file_path) + pre_obj.collection_information['method'] = 'Running Analysis Plugins' + pre_obj.collection_information['plugins'] = options.analysis_plugins + time_of_run = timelib.Timestamp.GetNow() + pre_obj.collection_information['time_of_run'] = time_of_run + + pre_obj.counter = collections.Counter() + + # Assign the preprocessing object to the storage. + # This is normally done in the construction of the storage object, + # however we cannot do that here since the preprocessing object is + # stored inside the storage file, so we need to open it first to + # be able to read it in, before we make changes to it. Thus we need + # to access this protected member of the class. + # pylint: disable=protected-access + storage_file._pre_obj = pre_obj + + # Start queues and load up plugins. + # TODO: add upper queue limit. + analysis_output_queue = multi_process.MultiProcessingQueue() + event_queue_producers = [] + event_queues = [] + analysis_plugins_list = [ + x.strip() for x in options.analysis_plugins.split(',')] + + for _ in xrange(0, len(analysis_plugins_list)): + # TODO: add upper queue limit. + analysis_plugin_queue = multi_process.MultiProcessingQueue() + event_queues.append(analysis_plugin_queue) + event_queue_producers.append( + queue.ItemQueueProducer(event_queues[-1])) + + knowledge_base_object = knowledge_base.KnowledgeBase() + + analysis_plugins = analysis.LoadPlugins( + analysis_plugins_list, event_queues, options) + + # Now we need to start all the plugins. + for analysis_plugin in analysis_plugins: + analysis_report_queue_producer = queue.ItemQueueProducer( + analysis_output_queue) + analysis_context_object = analysis_context.AnalysisContext( + analysis_report_queue_producer, knowledge_base_object) + analysis_process = multiprocessing.Process( + name='Analysis {0:s}'.format(analysis_plugin.plugin_name), + target=analysis_plugin.RunPlugin, args=(analysis_context_object,)) + self._analysis_processes.append(analysis_process) + + analysis_process.start() + logging.info( + u'Plugin: [{0:s}] started.'.format(analysis_plugin.plugin_name)) + else: + event_queue_producers = [] + + output_buffer = output_lib.EventBuffer(output_module, options.dedup) + with output_buffer: + counter = ProcessOutput( + output_buffer, output_module, self._filter_object, + self._filter_buffer, event_queue_producers) + + for information in storage_file.GetStorageInformation(): + if hasattr(information, 'counter'): + counter['Stored Events'] += information.counter['total'] + + if not options.quiet: + logging.info(u'Output processing is done.') + + # Get all reports and tags from analysis plugins. + if options.analysis_plugins: + logging.info(u'Processing data from analysis plugins.') + for event_queue_producer in event_queue_producers: + event_queue_producer.SignalEndOfInput() + + # Wait for all analysis plugins to complete. + for number, analysis_process in enumerate(self._analysis_processes): + logging.debug( + u'Waiting for analysis plugin: {0:d} to complete.'.format(number)) + if analysis_process.is_alive(): + analysis_process.join(10) + else: + logging.warning(u'Plugin {0:d} already stopped.'.format(number)) + analysis_process.terminate() + logging.debug(u'All analysis plugins are now stopped.') + + # Close the output queue. + analysis_output_queue.SignalEndOfInput() + + # Go over each output. + analysis_queue_consumer = PsortAnalysisReportQueueConsumer( + analysis_output_queue, storage_file, self._filter_expression, + self.preferred_encoding) + + analysis_queue_consumer.ConsumeItems() + + if analysis_queue_consumer.tags: + storage_file.StoreTagging(analysis_queue_consumer.tags) + + # TODO: analysis_queue_consumer.anomalies: + + for item, value in analysis_queue_consumer.counter.iteritems(): + counter[item] = value + + if self._filter_object and not counter['Limited By']: + counter['Filter By Date'] = ( + counter['Stored Events'] - counter['Events Included'] - + counter['Events Filtered Out']) + + return counter + + +# TODO: Function: _ConsumeItem is not defined, inspect if we need to define it +# or change the interface so that is not an abstract method. +# TODO: Remove this after dfVFS integration. +# pylint: disable=abstract-method +class PsortAnalysisReportQueueConsumer(queue.ItemQueueConsumer): + """Class that implements an analysis report queue consumer for psort.""" + + def __init__( + self, queue_object, storage_file, filter_string, preferred_encoding): + """Initializes the queue consumer. + + Args: + queue_object: the queue object (instance of Queue). + storage_file: the storage file (instance of StorageFile). + filter_string: the filter string. + preferred_encoding: the preferred encoding. + """ + super(PsortAnalysisReportQueueConsumer, self).__init__(queue_object) + self._filter_string = filter_string + self._preferred_encoding = preferred_encoding + self._storage_file = storage_file + self.anomalies = [] + self.counter = collections.Counter() + self.tags = [] + + def _ConsumeItem(self, analysis_report): + """Consumes an item callback for ConsumeItems. + + Args: + analysis_report: the analysis report (instance of AnalysisReport). + """ + self.counter['Total Reports'] += 1 + self.counter[u'Report: {0:s}'.format(analysis_report.plugin_name)] += 1 + + self.anomalies.extend(analysis_report.GetAnomalies()) + self.tags.extend(analysis_report.GetTags()) + + if self._filter_string: + analysis_report.filter_string = self._filter_string + + # For now we print the report to disk and then save it. + # TODO: Have the option of saving to a separate file and + # do something more here, for instance saving into a HTML + # file, or something else (including potential images). + self._storage_file.StoreReport(analysis_report) + + report_string = analysis_report.GetString() + try: + print report_string.encode(self._preferred_encoding) + except UnicodeDecodeError: + logging.error( + u'Unable to print report due to an unicode decode error. ' + u'The report is stored inside the storage file and can be ' + u'viewed using pinfo [if unable to view please submit a ' + u'bug report https://github.com/log2timeline/plaso/issues') + + +def _AppendEvent(event_object, output_buffer, event_queues): + """Appends an event object to an output buffer and queues. + + Args: + event_object: an event object (instance of EventObject). + output_buffer: the output buffer. + event_queues: a list of event queues that serve as input for + the analysis plugins. + """ + output_buffer.Append(event_object) + + # Needed due to duplicate removals, if two events + # are merged then we'll just pick the first inode value. + inode = getattr(event_object, 'inode', None) + if isinstance(inode, basestring): + inode_list = inode.split(';') + try: + new_inode = int(inode_list[0], 10) + except (ValueError, IndexError): + new_inode = 0 + + event_object.inode = new_inode + + for event_queue in event_queues: + event_queue.ProduceItem(event_object) + + +def ProcessOutput( + output_buffer, output_module, my_filter=None, filter_buffer=None, + analysis_queues=None): + """Fetch EventObjects from storage and process and filter them. + + Args: + output_buffer: output.EventBuffer object. + output_module: The output module (instance of OutputFormatter). + my_filter: A filter object. + filter_buffer: A filter buffer used to store previously discarded + events to store time slice history. + analysis_queues: A list of analysis queues. + """ + counter = collections.Counter() + my_limit = getattr(my_filter, 'limit', 0) + forward_entries = 0 + if not analysis_queues: + analysis_queues = [] + + event_object = output_module.FetchEntry() + while event_object: + if my_filter: + event_match = event_object + if isinstance(event_object, plaso_storage_pb2.EventObject): + # TODO: move serialization to storage, if low-level filtering is needed + # storage should provide functions for it. + serializer = protobuf_serializer.ProtobufEventObjectSerializer + event_match = serializer.ReadSerialized(event_object) + + if my_filter.Match(event_match): + counter['Events Included'] += 1 + if filter_buffer: + # Indicate we want forward buffering. + forward_entries = 1 + # Empty the buffer. + for event_in_buffer in filter_buffer.Flush(): + counter['Events Added From Slice'] += 1 + counter['Events Included'] += 1 + counter['Events Filtered Out'] -= 1 + _AppendEvent(event_in_buffer, output_buffer, analysis_queues) + _AppendEvent(event_object, output_buffer, analysis_queues) + if my_limit: + if counter['Events Included'] == my_limit: + break + else: + if filter_buffer and forward_entries: + if forward_entries <= filter_buffer.size: + _AppendEvent(event_object, output_buffer, analysis_queues) + forward_entries += 1 + counter['Events Added From Slice'] += 1 + counter['Events Included'] += 1 + else: + # Reached the max, don't include other entries. + forward_entries = 0 + counter['Events Filtered Out'] += 1 + elif filter_buffer: + filter_buffer.Append(event_object) + counter['Events Filtered Out'] += 1 + else: + counter['Events Filtered Out'] += 1 + else: + counter['Events Included'] += 1 + _AppendEvent(event_object, output_buffer, analysis_queues) + + event_object = output_module.FetchEntry() + + if output_buffer.duplicate_counter: + counter['Duplicate Removals'] = output_buffer.duplicate_counter + + if my_limit: + counter['Limited By'] = my_limit + return counter + + +def Main(arguments=None): + """Start the tool.""" + multiprocessing.freeze_support() + + front_end = PsortFrontend() + + arg_parser = argparse.ArgumentParser( + description=( + u'PSORT - Application to read, filter and process ' + u'output from a plaso storage file.'), add_help=False) + + tool_group = arg_parser.add_argument_group('Optional Arguments For Psort') + output_group = arg_parser.add_argument_group( + 'Optional Arguments For Output Modules') + analysis_group = arg_parser.add_argument_group( + 'Optional Arguments For Analysis Modules') + + tool_group.add_argument( + '-d', '--debug', action='store_true', dest='debug', default=False, + help='Fall back to debug shell if psort fails.') + + tool_group.add_argument( + '-q', '--quiet', action='store_true', dest='quiet', default=False, + help='Don\'t print out counter information after processing.') + + tool_group.add_argument( + '-h', '--help', action='help', help='Show this help message and exit.') + + tool_group.add_argument( + '-a', '--include_all', action='store_false', dest='dedup', default=True, + help=( + 'By default the tool removes duplicate entries from the output. ' + 'This parameter changes that behavior so all events are included.')) + + tool_group.add_argument( + '-o', '--output_format', '--output-format', metavar='FORMAT', + dest='output_format', default='dynamic', help=( + 'The output format or "-o list" to see a list of available ' + 'output formats.')) + + tool_group.add_argument( + '--analysis', metavar='PLUGIN_LIST', dest='analysis_plugins', + default='', action='store', type=unicode, help=( + 'A comma separated list of analysis plugin names to be loaded ' + 'or "--analysis list" to see a list of available plugins.')) + + tool_group.add_argument( + '-z', '--zone', metavar='TIMEZONE', default='UTC', dest='timezone', help=( + 'The timezone of the output or "-z list" to see a list of available ' + 'timezones.')) + + tool_group.add_argument( + '-w', '--write', metavar='OUTPUTFILE', dest='write', + help='Output filename. Defaults to stdout.') + + tool_group.add_argument( + '--slice', metavar='DATE', dest='slice', type=str, + default='', action='store', help=( + 'Create a time slice around a certain date. This parameter, if ' + 'defined will display all events that happened X minutes before and ' + 'after the defined date. X is controlled by the parameter ' + '--slice_size but defaults to 5 minutes.')) + + tool_group.add_argument( + '--slicer', dest='slicer', action='store_true', default=False, help=( + 'Create a time slice around every filter match. This parameter, if ' + 'defined will save all X events before and after a filter match has ' + 'been made. X is defined by the --slice_size parameter.')) + + tool_group.add_argument( + '--slice_size', dest='slice_size', type=int, default=5, action='store', + help=( + 'Defines the slice size. In the case of a regular time slice it ' + 'defines the number of minutes the slice size should be. In the ' + 'case of the --slicer it determines the number of events before ' + 'and after a filter match has been made that will be included in ' + 'the result set. The default value is 5]. See --slice or --slicer ' + 'for more details about this option.')) + + tool_group.add_argument( + '-v', '--version', dest='version', action='version', + version='log2timeline - psort version {0:s}'.format(plaso.GetVersion()), + help='Show the current version of psort.') + + front_end.AddStorageFileOptions(tool_group) + + tool_group.add_argument( + 'filter', nargs='?', action='store', metavar='FILTER', default=None, + type=unicode, help=( + 'A filter that can be used to filter the dataset before it ' + 'is written into storage. More information about the filters' + ' and it\'s usage can be found here: http://plaso.kiddaland.' + 'net/usage/filters')) + + if arguments is None: + arguments = sys.argv[1:] + + # Add the output module options. + if '-o' in arguments: + argument_index = arguments.index('-o') + 1 + elif '--output_format' in arguments: + argument_index = arguments.index('--output_format') + 1 + elif '--output-format' in arguments: + argument_index = arguments.index('--output-format') + 1 + else: + argument_index = 0 + + if argument_index > 0: + module_names = arguments[argument_index] + front_end.AddOutputModuleOptions(output_group, [module_names]) + + # Add the analysis plugin options. + if '--analysis' in arguments: + argument_index = arguments.index('--analysis') + 1 + + # Get the names of the analysis plugins that should be loaded. + plugin_names = arguments[argument_index] + try: + front_end.AddAnalysisPluginOptions(analysis_group, plugin_names) + except errors.BadConfigOption as exception: + arg_parser.print_help() + print u'' + logging.error('{0:s}'.format(exception)) + return False + + options = arg_parser.parse_args(args=arguments) + + format_str = '[%(levelname)s] %(message)s' + if getattr(options, 'debug', False): + logging.basicConfig(level=logging.DEBUG, format=format_str) + else: + logging.basicConfig(level=logging.INFO, format=format_str) + + if options.timezone == 'list': + front_end.ListTimeZones() + return True + + if options.analysis_plugins == 'list': + front_end.ListAnalysisPlugins() + return True + + if options.output_format == 'list': + front_end.ListOutputModules() + return True + + try: + front_end.ParseOptions(options) + except errors.BadConfigOption as exception: + arg_parser.print_help() + print u'' + logging.error(u'{0:s}'.format(exception)) + return False + + if front_end.preferred_encoding == 'ascii': + logging.warning( + u'The preferred encoding of your system is ASCII, which is not optimal ' + u'for the typically non-ASCII characters that need to be parsed and ' + u'processed. The tool will most likely crash and die, perhaps in a way ' + u'that may not be recoverable. A five second delay is introduced to ' + u'give you time to cancel the runtime and reconfigure your preferred ' + u'encoding, otherwise continue at own risk.') + time.sleep(5) + + try: + counter = front_end.ParseStorage(options) + + if not options.quiet: + logging.info(frontend_utils.FormatHeader('Counter')) + for element, count in counter.most_common(): + logging.info(frontend_utils.FormatOutputString(element, count)) + + except IOError as exception: + # Piping results to "|head" for instance causes an IOError. + if u'Broken pipe' not in exception: + logging.error(u'Processing stopped early: {0:s}.'.format(exception)) + + except KeyboardInterrupt: + pass + + # Catching every remaining exception in case we are debugging. + except Exception as exception: + if not options.debug: + raise + logging.error(u'{0:s}'.format(exception)) + pdb.post_mortem() + + return True + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/plaso/frontend/psort_test.py b/plaso/frontend/psort_test.py new file mode 100644 index 0000000..9353a21 --- /dev/null +++ b/plaso/frontend/psort_test.py @@ -0,0 +1,197 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the psort front-end.""" + +import os +import StringIO +import unittest + +from plaso.formatters import interface as formatters_interface +from plaso.formatters import manager as formatters_manager +from plaso.frontend import psort +from plaso.frontend import test_lib +from plaso.lib import event +from plaso.lib import output +from plaso.lib import pfilter +from plaso.lib import storage +from plaso.lib import timelib_test + + +class TestEvent1(event.EventObject): + DATA_TYPE = 'test:psort:1' + + def __init__(self): + super(TestEvent1, self).__init__() + self.timestamp = 123456 + + +class TestEvent2(event.EventObject): + DATA_TYPE = 'test:psort:2' + + def __init__(self, timestamp): + super(TestEvent2, self).__init__() + self.timestamp = timestamp + self.timestamp_desc = 'Last Written' + + self.parser = 'TestEvent' + + self.display_name = '/dev/none' + self.filename = '/dev/none' + self.some = u'My text dude.' + self.var = {'Issue': False, 'Closed': True} + + +class TestEvent2Formatter(formatters_interface.EventFormatter): + DATA_TYPE = 'test:psort:2' + + FORMAT_STRING = 'My text goes along: {some} lines' + + SOURCE_SHORT = 'LOG' + SOURCE_LONG = 'None in Particular' + + +class TestFormatter(output.LogOutputFormatter): + """Dummy formatter.""" + + def FetchEntry(self, store_number=-1, store_index=-1): + return self.store.GetSortedEntry() + + def Start(self): + self.filehandle.write(( + 'date,time,timezone,MACB,source,sourcetype,type,user,host,' + 'short,desc,version,filename,inode,notes,format,extra\n')) + + def EventBody(self, event_object): + """Writes the event body. + + Args: + event_object: The event object (instance of EventObject). + """ + event_formatter = formatters_manager.EventFormatterManager.GetFormatter( + event_object) + msg, _ = event_formatter.GetMessages(event_object) + source_short, source_long = event_formatter.GetSources(event_object) + self.filehandle.write(u'{0:s}/{1:s} {2:s}\n'.format( + source_short, source_long, msg)) + + +class TestEventBuffer(output.EventBuffer): + """A test event buffer.""" + + def __init__(self, store, formatter=None): + self.record_count = 0 + self.store = store + if not formatter: + formatter = TestFormatter(store) + super(TestEventBuffer, self).__init__(formatter, False) + + def Append(self, event_object): + self._buffer_dict[event_object.EqualityString()] = event_object + self.record_count += 1 + + def Flush(self): + for event_object_key in self._buffer_dict: + self.formatter.EventBody(self._buffer_dict[event_object_key]) + self._buffer_dict = {} + + def End(self): + pass + + +class PsortFrontendTest(test_lib.FrontendTestCase): + """Tests for the psort front-end.""" + + def setUp(self): + """Setup sets parameters that will be reused throughout this test.""" + self._front_end = psort.PsortFrontend() + + # TODO: have sample output generated from the test. + self._test_file = os.path.join(self._TEST_DATA_PATH, 'psort_test.out') + self.first = timelib_test.CopyStringToTimestamp('2012-07-24 21:45:24') + self.last = timelib_test.CopyStringToTimestamp('2016-11-18 01:15:43') + + def testReadEntries(self): + """Ensure returned EventObjects from the storage are within timebounds.""" + timestamp_list = [] + pfilter.TimeRangeCache.ResetTimeConstraints() + pfilter.TimeRangeCache.SetUpperTimestamp(self.last) + pfilter.TimeRangeCache.SetLowerTimestamp(self.first) + + storage_file = storage.StorageFile(self._test_file, read_only=True) + storage_file.SetStoreLimit() + + event_object = storage_file.GetSortedEntry() + while event_object: + timestamp_list.append(event_object.timestamp) + event_object = storage_file.GetSortedEntry() + + self.assertEquals(len(timestamp_list), 8) + self.assertTrue( + timestamp_list[0] >= self.first and timestamp_list[-1] <= self.last) + + storage_file.Close() + + def testOutput(self): + """Testing if psort can output data.""" + events = [] + events.append(TestEvent2(5134324321)) + events.append(TestEvent2(2134324321)) + events.append(TestEvent2(9134324321)) + events.append(TestEvent2(15134324321)) + events.append(TestEvent2(5134324322)) + events.append(TestEvent2(5134024321)) + + output_fd = StringIO.StringIO() + + with test_lib.TempDirectory() as dirname: + temp_file = os.path.join(dirname, 'plaso.db') + + storage_file = storage.StorageFile(temp_file, read_only=False) + pfilter.TimeRangeCache.ResetTimeConstraints() + storage_file.SetStoreLimit() + storage_file.AddEventObjects(events) + storage_file.Close() + + storage_file = storage.StorageFile(temp_file) + with storage_file: + storage_file.store_range = [1] + formatter = TestFormatter(storage_file, output_fd) + event_buffer = TestEventBuffer(storage_file, formatter) + + psort.ProcessOutput(event_buffer, formatter, None) + + event_buffer.Flush() + lines = [] + for line in output_fd.getvalue().split('\n'): + if line == '.': + continue + if line: + lines.append(line) + + # One more line than events (header row). + self.assertEquals(len(lines), 7) + self.assertTrue('My text goes along: My text dude. lines' in lines[2]) + self.assertTrue('LOG/' in lines[2]) + self.assertTrue('None in Particular' in lines[2]) + self.assertEquals(lines[0], ( + 'date,time,timezone,MACB,source,sourcetype,type,user,host,short,desc,' + 'version,filename,inode,notes,format,extra')) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/frontend/test_lib.py b/plaso/frontend/test_lib.py new file mode 100644 index 0000000..0ef9f52 --- /dev/null +++ b/plaso/frontend/test_lib.py @@ -0,0 +1,68 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Front-end related functions and classes for testing.""" + +import os +import shutil +import tempfile +import unittest + + +class Options(object): + """A simple configuration object.""" + + +class TempDirectory(object): + """A self cleaning temporary directory.""" + + def __init__(self): + """Initializes the temporary directory.""" + super(TempDirectory, self).__init__() + self.name = u'' + + def __enter__(self): + """Make this work with the 'with' statement.""" + self.name = tempfile.mkdtemp() + return self.name + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Make this work with the 'with' statement.""" + shutil.rmtree(self.name, True) + + +class FrontendTestCase(unittest.TestCase): + """The unit test case for a front-end.""" + + _TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data') + + # Show full diff results, part of TestCase so does not follow our naming + # conventions. + maxDiff = None + + def _GetTestFilePath(self, path_segments): + """Retrieves the path of a test file relative to the test data directory. + + Args: + path_segments: the path segments inside the test data directory. + + Returns: + A path of the test file. + """ + # Note that we need to pass the individual path segments to os.path.join + # and not a list. + return os.path.join(self._TEST_DATA_PATH, *path_segments) diff --git a/plaso/frontend/utils.py b/plaso/frontend/utils.py new file mode 100644 index 0000000..8686392 --- /dev/null +++ b/plaso/frontend/utils.py @@ -0,0 +1,212 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Frontend utility classes and functions.""" + +import binascii +import tempfile +import os + +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.lib import timelib + + +# TODO: add tests for the functions in this class. +class OutputWriter(object): + """Class that defines output writing methods for the frontends and tools.""" + + DATA_BUFFER_SIZE = 32768 + + @classmethod + def GetDateTimeString(cls, timestamp): + """Returns a human readable date and time string in the ISO 8601 format.""" + return timelib.Timestamp.CopyToIsoFormat(timestamp) + + @classmethod + def GetEventDataHexDump(cls, event_object, before=0, length=20): + """Returns a hexadecimal representation of the event data. + + This function creates a hexadecimal string representation based on + the event data described by the event object. + + Args: + event_object: The event object (instance of EventObject). + before: Optional number of bytes to include in the output before + the event. The default is none. + length: Optional number of lines to include in the output. + The default is 20. + + Returns: + A string that contains the hexadecimal representation of the event data. + """ + if not event_object: + return u'Missing event object.' + + if not hasattr(event_object, 'pathspec'): + return u'Event object has no path specification.' + + try: + file_entry = path_spec_resolver.Resolver.OpenFileEntry( + event_object.pathspec) + except IOError as exception: + return u'Unable to open file with error: {0:s}'.format(exception) + + offset = getattr(event_object, 'offset', 0) + if offset - before > 0: + offset -= before + + file_object = file_entry.GetFileObject() + file_object.seek(offset, os.SEEK_SET) + data = file_object.read(int(length) * 16) + file_object.close() + + return cls.GetHexDump(data, offset) + + @classmethod + def GetHexDump(cls, data, offset=0): + """Returns a hexadecimal representation of the contents of a binary string. + + All ASCII characters in the hexadecimal representation (hexdump) are + translated back to their character representation. + + Args: + data: The binary string. + offset: An optional start point in bytes where the data lies, for + presentation purposes. + + Returns: + A string that contains the hexadecimal representation of the binary + string. + """ + hexdata = binascii.hexlify(data) + output_strings = [] + # Note that the // statement is a Python specific method of ensuring + # an integer division. + hexdata_length = len(hexdata) + lines_of_hexdata = hexdata_length // 32 + + line_number = 0 + point = 0 + while line_number < lines_of_hexdata: + line_of_hexdata = hexdata[point:point + 32] + output_strings.append( + cls.GetHexDumpLine(line_of_hexdata, offset, line_number)) + hexdata_length -= 32 + line_number += 1 + point += 32 + + if hexdata_length > 0: + line_of_hexdata = '{0:s}{1:s}'.format( + hexdata[point:], ' ' * (32 - hexdata_length)) + output_strings.append( + cls.GetHexDumpLine(line_of_hexdata, offset, line_number)) + + return '\n'.join(output_strings) + + @classmethod + def GetHexDumpLine(cls, line, orig_ofs, entry_nr=0): + """Returns a single line of 'xxd'-like hexadecimal representation.""" + output_strings = [] + output_strings.append('{0:07x}: '.format(orig_ofs + entry_nr * 16)) + + for bit in range(0, 8): + output_strings.append('{0:s} '.format(line[bit * 4:bit * 4 + 4])) + + for bit in range(0, 16): + try: + data = binascii.unhexlify(line[bit * 2: bit * 2 + 2]) + except TypeError: + data = '.' + + if ord(data) > 31 and ord(data) < 128: + output_strings.append(data) + else: + output_strings.append('.') + + return ''.join(output_strings) + + @classmethod + def WriteFile(cls, input_file_object, output_path=None): + """Writes the data of a file-like object to a "regular" file. + + Args: + input_file_object: the input file-like object. + output_path: the path of the output path. The default is None which will + write the data to a temporary file. + + Returns: + The path of the output file. + """ + if output_path: + output_file_object = open(output_path, 'wb') + else: + output_file_object = tempfile.NamedTemporaryFile() + output_path = output_file_object.name + + input_file_object.seek(0, os.SEEK_SET) + data = input_file_object.read(cls.DATA_BUFFER_SIZE) + while data: + output_file_object.write(data) + data = input_file_object.read(cls.DATA_BUFFER_SIZE) + + output_file_object.close() + return output_path + + +def FormatHeader(header, char='*'): + """Formats the header as a line of 80 chars with the header text centered.""" + format_string = '\n{{0:{0:s}^80}}'.format(char) + return format_string.format(u' {0:s} '.format(header)) + + +def FormatOutputString(name, description, col_length=25): + """Return a formatted string ready for output.""" + max_width = 80 + line_length = max_width - col_length - 3 + + # TODO: add an explanation what this code is doing. + fmt = u'{{:>{0:d}s}} : {{}}'.format(col_length) + fmt_second = u'{{:<{0:d}}}{{}}'.format(col_length + 3) + + description = unicode(description) + if len(description) < line_length: + return fmt.format(name, description) + + # Split each word up in the description. + words = description.split() + + current = 0 + + lines = [] + word_buffer = [] + for word in words: + current += len(word) + 1 + if current >= line_length: + current = len(word) + lines.append(u' '.join(word_buffer)) + word_buffer = [word] + else: + word_buffer.append(word) + lines.append(u' '.join(word_buffer)) + + ret = [] + ret.append(fmt.format(name, lines[0])) + for line in lines[1:]: + ret.append(fmt_second.format('', line)) + + return u'\n'.join(ret) diff --git a/plaso/lib/__init__.py b/plaso/lib/__init__.py new file mode 100644 index 0000000..0c8696c --- /dev/null +++ b/plaso/lib/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/lib/binary.py b/plaso/lib/binary.py new file mode 100644 index 0000000..eeef22b --- /dev/null +++ b/plaso/lib/binary.py @@ -0,0 +1,280 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a helper library to read binary files.""" + +import binascii +import logging +import os + + +def ByteArrayCopyToString(byte_array, codepage='utf-8'): + """Copies a UTF-8 encoded byte array into a Unicode string. + + Args: + byte_array: A byte array containing an UTF-8 encoded string. + codepage: The codepage of the byte stream. The default is utf-8. + + Returns: + A Unicode string. + """ + byte_stream = ''.join(map(chr, byte_array)) + return ByteStreamCopyToString(byte_stream, codepage=codepage) + + +def ByteStreamCopyToString(byte_stream, codepage='utf-8'): + """Copies a UTF-8 encoded byte stream into a Unicode string. + + Args: + byte_stream: A byte stream containing an UTF-8 encoded string. + codepage: The codepage of the byte stream. The default is utf-8. + + Returns: + A Unicode string. + """ + try: + string = byte_stream.decode(codepage) + except UnicodeDecodeError: + logging.warning( + u'Unable to decode {0:s} formatted byte stream.'.format(codepage)) + string = byte_stream.decode(codepage, errors='ignore') + + string, _, _ = string.partition('\x00') + return string + + +def ByteStreamCopyToGuid(byte_stream, byte_order='little-endian'): + """Reads a GUID from the byte stream. + + Args: + byte_stream: The byte stream that contains the UTF-16 formatted stream. + byte_order: The byte order, either big- or little-endian. The default is + little-endian. + + Returns: + String containing the GUID. + """ + if len(byte_stream) >= 16: + if byte_order == 'big-endian': + return ( + u'{{{0:02x}{1:02x}{2:02x}{3:02x}-{4:02x}{5:02x}-' + u'{6:02x}{7:02x}-{8:02x}{9:02x}-' + u'{10:02x}{11:02x}{12:02x}{13:02x}{14:02x}{15:02x}}}').format( + *byte_stream[:16]) + elif byte_order == 'little-endian': + return ( + u'{{{3:02x}{2:02x}{1:02x}{0:02x}-{5:02x}{4:02x}-' + u'{7:02x}{6:02x}-{8:02x}{9:02x}-' + u'{10:02x}{11:02x}{12:02x}{13:02x}{14:02x}{15:02x}}}').format( + *byte_stream[:16]) + return u'' + + +def ByteStreamCopyToUtf16Stream(byte_stream, byte_stream_size=None): + """Reads an UTF-16 formatted stream from a byte stream. + + The UTF-16 formatted stream should be terminated by an end-of-string + character (\x00\x00). Otherwise the function reads up to the byte stream size. + + Args: + byte_stream: The byte stream that contains the UTF-16 formatted stream. + byte_stream_size: Optional byte stream size or None if the entire + byte stream should be read. The default is None. + + Returns: + String containing the UTF-16 formatted stream. + """ + byte_stream_index = 0 + if not byte_stream_size: + byte_stream_size = len(byte_stream) + + while byte_stream_index + 1 < byte_stream_size: + if (byte_stream[byte_stream_index] == '\x00' and + byte_stream[byte_stream_index + 1] == '\x00'): + break + + byte_stream_index += 2 + + return byte_stream[0:byte_stream_index] + + +def ReadUtf16Stream(file_object, offset=None, byte_size=0): + """Reads an UTF-16 formatted stream from a file-like object. + + Reads an UTF-16 formatted stream that's terminated by + an end-of-string character (\x00\x00) or upto the byte size. + + Args: + file_object: A file-like object to read the data from. + offset: An offset into the file object data, if -1 or not set + the current location into the file object data is used. + byte_size: Maximum number of bytes to read or 0 if the function + should keep reading upto the end of file. + + Returns: + An Unicode string. + """ + if offset is not None: + file_object.seek(offset, os.SEEK_SET) + + char_buffer = [] + + stream_index = 0 + char_raw = file_object.read(2) + while char_raw: + if byte_size and stream_index >= byte_size: + break + + if '\x00\x00' in char_raw: + break + char_buffer.append(char_raw) + stream_index += 2 + char_raw = file_object.read(2) + + return ReadUtf16(''.join(char_buffer)) + + +def Ut16StreamCopyToString(byte_stream, byte_stream_size=None): + """Copies an UTF-16 formatted byte stream to a string. + + The UTF-16 formatted byte stream should be terminated by an end-of-string + character (\x00\x00). Otherwise the function reads up to the byte stream size. + + Args: + byte_stream: The UTF-16 formatted byte stream. + byte_stream_size: The byte stream size or None if the entire byte stream + should be used. + + Returns: + An Unicode string. + """ + utf16_stream = ByteStreamCopyToUtf16Stream( + byte_stream, byte_stream_size=byte_stream_size) + + try: + return utf16_stream.decode('utf-16-le') + except (UnicodeDecodeError, UnicodeEncodeError) as exception: + logging.error(u'Unable to decode string: {0:s} with error: {1:s}'.format( + HexifyBuffer(utf16_stream), exception)) + + return utf16_stream.decode('utf-16-le', errors='ignore') + + +def ArrayOfUt16StreamCopyToString(byte_stream, byte_stream_size=None): + """Copies an array of UTF-16 formatted byte streams to an array of strings. + + The UTF-16 formatted byte stream should be terminated by an end-of-string + character (\x00\x00). Otherwise the function reads upto the byte stream size. + + Args: + byte_stream: The UTF-16 formatted byte stream. + byte_stream_size: The byte stream size or None if the entire byte stream + should be used. + + Returns: + An array of Unicode strings. + """ + array_of_strings = [] + utf16_stream_start = 0 + byte_stream_index = 0 + if not byte_stream_size: + byte_stream_size = len(byte_stream) + + while byte_stream_index + 1 < byte_stream_size: + if (byte_stream[byte_stream_index] == '\x00' and + byte_stream[byte_stream_index + 1] == '\x00'): + + if byte_stream_index - utf16_stream_start <= 2: + break + + array_of_strings.append( + byte_stream[utf16_stream_start:byte_stream_index].decode( + 'utf-16-le')) + utf16_stream_start = byte_stream_index + 2 + + byte_stream_index += 2 + + return array_of_strings + + +def ArrayOfUt16StreamCopyToStringTable(byte_stream, byte_stream_size=None): + """Copies an array of UTF-16 formatted byte streams to a string table. + + The string table is a dict of strings with the byte offset as their key. + The UTF-16 formatted byte stream should be terminated by an end-of-string + character (\x00\x00). Otherwise the function reads upto the byte stream size. + + Args: + byte_stream: The UTF-16 formatted byte stream. + byte_stream_size: The byte stream size or None if the entire byte stream + should be used. + + Returns: + A dict of Unicode strings with the byte offset as their key. + """ + string_table = {} + utf16_stream_start = 0 + byte_stream_index = 0 + if not byte_stream_size: + byte_stream_size = len(byte_stream) + + while byte_stream_index + 1 < byte_stream_size: + if (byte_stream[byte_stream_index] == '\x00' and + byte_stream[byte_stream_index + 1] == '\x00'): + + if byte_stream_index - utf16_stream_start <= 2: + break + + string = byte_stream[utf16_stream_start:byte_stream_index].decode( + 'utf-16-le') + string_table[utf16_stream_start] = string + utf16_stream_start = byte_stream_index + 2 + + byte_stream_index += 2 + + return string_table + + +def ReadUtf16(string_buffer): + """Returns a decoded UTF-16 string from a string buffer.""" + if type(string_buffer) in (list, tuple): + use_buffer = u''.join(string_buffer) + else: + use_buffer = string_buffer + + if not type(use_buffer) in (str, unicode): + return u'' + + try: + return use_buffer.decode('utf-16').replace('\x00', '') + except SyntaxError as exception: + logging.error(u'Unable to decode string: {0:s} with error: {1:s}.'.format( + HexifyBuffer(string_buffer), exception)) + except (UnicodeDecodeError, UnicodeEncodeError) as exception: + logging.error(u'Unable to decode string: {0:s} with error: {1:s}'.format( + HexifyBuffer(string_buffer), exception)) + + return use_buffer.decode('utf-16', errors='ignore').replace('\x00', '') + + +def HexifyBuffer(string_buffer): + """Return a string with the hex representation of a string buffer.""" + chars = [] + for char in string_buffer: + chars.append(binascii.hexlify(char)) + + return u'\\x{0:s}'.format(u'\\x'.join(chars)) diff --git a/plaso/lib/binary_test.py b/plaso/lib/binary_test.py new file mode 100644 index 0000000..fce4f06 --- /dev/null +++ b/plaso/lib/binary_test.py @@ -0,0 +1,206 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a unit test for the binary helper in Plaso.""" +import os +import unittest + +from plaso.lib import binary + + +class BinaryTests(unittest.TestCase): + """A unit test for the binary helper functions.""" + + def setUp(self): + """Set up the needed variables used througout.""" + # String: "þrándur" - uses surrogate pairs to test four byte character + # decoding. + self._unicode_string_1 = ( + '\xff\xfe\xfe\x00\x72\x00\xe1\x00\x6E\x00\x64\x00\x75\x00\x72\x00') + + # String: "What\x00is". + self._ascii_string_1 = ( + '\x57\x00\x68\x00\x61\x00\x74\x00\x00\x00\x69\x00\x73\x00') + + # String: "What is this?". + self._ascii_string_2 = ( + '\x57\x00\x68\x00\x61\x00\x74\x00\x20\x00\x69\x00\x73\x00' + '\x20\x00\x74\x00\x68\x00\x69\x00\x73\x00\x3F\x00') + + # Show full diff results, part of TestCase so does not follow our naming + # conventions. + self.maxDiff = None + + def testReadUtf16Stream(self): + """Test reading an UTF-16 stream from a file-like object.""" + path = os.path.join('test_data', 'PING.EXE-B29F6629.pf') + with open(path, 'rb') as fh: + # Read a null char terminated string. + fh.seek(0x10) + self.assertEquals(binary.ReadUtf16Stream(fh), 'PING.EXE') + + # Read a fixed size string. + fh.seek(0x27f8) + expected_string = u'\\DEVICE\\HARDDISKVOLUME' + string = binary.ReadUtf16Stream(fh, byte_size=44) + self.assertEquals(string, expected_string) + + fh.seek(0x27f8) + expected_string = u'\\DEVICE\\HARDDISKVOLUME1' + string = binary.ReadUtf16Stream(fh, byte_size=46) + self.assertEquals(string, expected_string) + + # Read another null char terminated string. + fh.seek(7236) + self.assertEquals( + binary.ReadUtf16Stream(fh), + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NTDLL.DLL') + + def testUt16StreamCopyToString(self): + """Test copying an UTF-16 byte stream to a string.""" + path = os.path.join('test_data', 'PING.EXE-B29F6629.pf') + with open(path, 'rb') as fh: + byte_stream = fh.read() + + # Read a null char terminated string. + self.assertEquals( + binary.Ut16StreamCopyToString(byte_stream[0x10:]), 'PING.EXE') + + # Read a fixed size string. + expected_string = u'\\DEVICE\\HARDDISKVOLUME' + string = binary.Ut16StreamCopyToString( + byte_stream[0x27f8:], byte_stream_size=44) + self.assertEquals(string, expected_string) + + expected_string = u'\\DEVICE\\HARDDISKVOLUME1' + string = binary.Ut16StreamCopyToString( + byte_stream[0x27f8:], byte_stream_size=46) + self.assertEquals(string, expected_string) + + # Read another null char terminated string. + expected_string = ( + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NTDLL.DLL') + + string = binary.Ut16StreamCopyToString(byte_stream[7236:]) + self.assertEquals(string, expected_string) + + def testArrayOfUt16StreamCopyToString(self): + """Test copying an array of UTF-16 byte streams to strings.""" + path = os.path.join('test_data', 'PING.EXE-B29F6629.pf') + with open(path, 'rb') as fh: + byte_stream = fh.read() + + strings_array = binary.ArrayOfUt16StreamCopyToString( + byte_stream[0x1c44:], byte_stream_size=2876) + expected_strings_array = [ + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NTDLL.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\KERNEL32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\APISETSCHEMA.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\KERNELBASE.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\LOCALE.NLS', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\PING.EXE', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\ADVAPI32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSVCRT.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\SECHOST.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\RPCRT4.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\IPHLPAPI.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NSI.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WINNSI.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\USER32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\GDI32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\LPK.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\USP10.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WS2_32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\IMM32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSCTF.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\EN-US\\PING.EXE.MUI', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\GLOBALIZATION\\SORTING\\' + u'SORTDEFAULT.NLS', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSWSOCK.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHQOS.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHTCPIP.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHIP6.DLL'] + + self.assertEquals(strings_array, expected_strings_array) + + def testArrayOfUt16StreamCopyToStringTable(self): + """Test copying an array of UTF-16 byte streams to a string table.""" + path = os.path.join('test_data', 'PING.EXE-B29F6629.pf') + with open(path, 'rb') as fh: + byte_stream = fh.read() + + string_table = binary.ArrayOfUt16StreamCopyToStringTable( + byte_stream[0x1c44:], byte_stream_size=2876) + expected_string_table = { + 0: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NTDLL.DLL', + 102: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\KERNEL32.DLL', + 210: (u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\' + u'APISETSCHEMA.DLL'), + 326: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\KERNELBASE.DLL', + 438: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\LOCALE.NLS', + 542: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\PING.EXE', + 642: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\ADVAPI32.DLL', + 750: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSVCRT.DLL', + 854: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\SECHOST.DLL', + 960: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\RPCRT4.DLL', + 1064: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\IPHLPAPI.DLL', + 1172: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NSI.DLL', + 1270: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WINNSI.DLL', + 1374: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\USER32.DLL', + 1478: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\GDI32.DLL', + 1580: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\LPK.DLL', + 1678: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\USP10.DLL', + 1780: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WS2_32.DLL', + 1884: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\IMM32.DLL', + 1986: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSCTF.DLL', + 2088: (u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\EN-US\\' + u'PING.EXE.MUI'), + 2208: (u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\GLOBALIZATION\\' + u'SORTING\\SORTDEFAULT.NLS'), + 2348: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSWSOCK.DLL', + 2454: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHQOS.DLL', + 2558: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHTCPIP.DLL', + 2666: u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WSHIP6.DLL'} + + self.assertEquals(string_table, expected_string_table) + + def testStringParsing(self): + """Test parsing the ASCII string.""" + self.assertEquals(binary.ReadUtf16(self._ascii_string_1), 'Whatis') + + self.assertEquals(binary.ReadUtf16(self._ascii_string_2), 'What is this?') + + uni_text = binary.ReadUtf16(self._unicode_string_1) + self.assertEquals(uni_text, u'þrándur') + + def testHex(self): + """Test the hexadecimal representation of data.""" + hex_string_1 = binary.HexifyBuffer(self._ascii_string_1) + hex_compare = ( + '\\x57\\x00\\x68\\x00\\x61\\x00\\x74\\x00\\x00\\x00\\x69\\x00' + '\\x73\\x00') + self.assertEquals(hex_string_1, hex_compare) + + hex_string_2 = binary.HexifyBuffer(self._unicode_string_1) + hex_compare_unicode = ( + '\\xff\\xfe\\xfe\\x00\\x72\\x00\\xe1\\x00\\x6e\\x00\\x64\\x00' + '\\x75\\x00\\x72\\x00') + + self.assertEquals(hex_string_2, hex_compare_unicode) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/lib/bufferlib.py b/plaso/lib/bufferlib.py new file mode 100644 index 0000000..3a5bada --- /dev/null +++ b/plaso/lib/bufferlib.py @@ -0,0 +1,77 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains buffer related objects used in plaso.""" + + +class CircularBuffer(object): + """Simple circular buffer for storing EventObjects.""" + + def __init__(self, size): + """Initialize a fixed size circular buffer. + + Args: + size: An integer indicating the number of elements in the buffer. + """ + self._size = size + self._index = 0 + self._list = [] + + def __len__(self): + """Return the length (the fixed size).""" + return self._size + + @property + def size(self): + return self._size + + def GetCurrent(self): + """Return the current item that index points to.""" + index = self._index - 1 + if index < 0: + return + + return self._list[index] + + def Clear(self): + """Clear all elements in the list.""" + self._list = [] + self._index = 0 + + def __iter__(self): + """Return all elements from the list.""" + for index in range(0, self._size): + try: + yield self._list[(self._index + index) % self._size] + except IndexError: + pass + + def Flush(self): + """Return a generator for all items and clear the buffer.""" + for item in self: + yield item + self.Clear() + + def Append(self, item): + """Add an item to the list.""" + if self._index >= self._size: + self._index = self._index % self._size + + try: + self._list[self._index] = item + except IndexError: + self._list.append(item) + self._index += 1 diff --git a/plaso/lib/bufferlib_test.py b/plaso/lib/bufferlib_test.py new file mode 100644 index 0000000..3eb051f --- /dev/null +++ b/plaso/lib/bufferlib_test.py @@ -0,0 +1,60 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for plaso.lib.buffer""" + +import unittest + +from plaso.lib import bufferlib + + +class TestBuffer(unittest.TestCase): + """Test the circular buffer.""" + + def testBuffer(self): + items = range(1, 11) + + circular_buffer = bufferlib.CircularBuffer(10) + + self.assertEquals(len(circular_buffer), 10) + self.assertEquals(circular_buffer.size, 10) + self.assertTrue(circular_buffer.GetCurrent() is None) + + for item in items: + circular_buffer.Append(item) + self.assertEquals(circular_buffer.GetCurrent(), item) + self.assertEquals(circular_buffer.size, 10) + + content = list(circular_buffer) + self.assertEquals(items, content) + + circular_buffer.Append(11) + self.assertEquals( + [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], list(circular_buffer.Flush())) + + self.assertEquals(circular_buffer.GetCurrent(), None) + + new_items = range(1, 51) + for item in new_items: + circular_buffer.Append(item) + self.assertEquals(circular_buffer.GetCurrent(), item) + self.assertEquals(circular_buffer.size, 10) + + self.assertEquals(range(41, 51), list(circular_buffer)) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/lib/errors.py b/plaso/lib/errors.py new file mode 100644 index 0000000..595ba3b --- /dev/null +++ b/plaso/lib/errors.py @@ -0,0 +1,113 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the error classes.""" + +class Error(Exception): + """Base error class.""" + + +class BadConfigOption(Error): + """Raised when the engine is started with a faulty parameter.""" + + +class CollectorError(Error): + """Class that defines collector errors.""" + + +class NotAText(Error): + """Raised when trying to read a text on a non-text sample.""" + + +class NoFormatterFound(Error): + """Raised when no formatter is found for a particular event.""" + + +class PathNotFound(Error): + """Raised when a preprocessor fails to fill in a path variable.""" + + +class PreProcessFail(Error): + """Raised when a preprocess module is unable to gather information.""" + + +class ProxyFailedToStart(Error): + """Raised when unable to start a proxy.""" + + +class QueueEmpty(Error): + """Class that implements a queue empty exception.""" + + +class QueueFull(Error): + """Class that implements a queue full exception.""" + + +class SameFileType(Error): + """Raised when a file is being evaluated against the same driver type.""" + + +class SourceScannerError(Error): + """Class that defines source scanner errors.""" + + +class TimestampNotCorrectlyFormed(Error): + """Raised when there is an error adding a timestamp to an EventObject.""" + + +class UnableToOpenFile(Error): + """Raised when a PlasoFile class attempts to open a file it cannot open.""" + + +class UnableToOpenFilesystem(Error): + """Raised when unable to open filesystem.""" + + +class UnableToParseFile(Error): + """Raised when a parser is not designed to parse a file.""" + + +class UserAbort(Error): + """Class that defines an user initiated abort exception.""" + + +class WrongBencodePlugin(Error): + """Error reporting wrong bencode plugin used.""" + + +class WrongFilterOption(Error): + """Raised when the filter option is badly formed.""" + + +class WrongFormatter(Error): + """Raised when the formatter is not applicable for a particular event.""" + + +class WrongPlistPlugin(Error): + """Error reporting wrong plist plugin used.""" + + +class WrongPlugin(Error): + """Raised when the plugin is of the wrong type.""" + + +class WrongProtobufEntry(Error): + """Raised when an EventObject cannot be serialized as a protobuf.""" + + +class WinRegistryValueError(Error): + """Raised when there is an issue reading a registry value.""" diff --git a/plaso/lib/event.py b/plaso/lib/event.py new file mode 100644 index 0000000..16cc98b --- /dev/null +++ b/plaso/lib/event.py @@ -0,0 +1,478 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The core object definitions, e.g. the event object.""" + +import collections +import logging +import uuid + +from plaso.formatters import manager as formatters_manager +from plaso.lib import timelib +from plaso.lib import utils + +import pytz + + +class AnalysisReport(object): + """Class that defines an analysis report.""" + + def __init__(self): + """Initializes the analysis report.""" + super(AnalysisReport, self).__init__() + self._anomalies = [] + self._tags = [] + + def __unicode__(self): + """Return an unicode string representation of the report.""" + return self.GetString() + + def GetAnomalies(self): + """Retrieves the list of anomalies that are attached to the report.""" + return self._anomalies + + def GetString(self): + """Return an unicode string representation of the report.""" + # TODO: Make this a more complete function that includes images + # and the option of saving as a full fledged HTML document. + string_list = [] + string_list.append(u'Report generated from: {0:s}'.format(self.plugin_name)) + + time_compiled = getattr(self, 'time_compiled', 0) + if time_compiled: + time_compiled = timelib.Timestamp.CopyToIsoFormat(time_compiled) + string_list.append(u'Generated on: {0:s}'.format(time_compiled)) + + filter_string = getattr(self, 'filter_string', '') + if filter_string: + string_list.append(u'Filter String: {0:s}'.format(filter_string)) + + string_list.append(u'') + string_list.append(u'Report text:') + string_list.append(self.text) + + return u'\n'.join(string_list) + + def GetTags(self): + """Retrieves the list of event tags that are attached to the report.""" + return self._tags + + # TODO: rename text to body? + def SetText(self, lines_of_text): + """Sets the text based on a list of lines of text. + + Args: + lines_of_text: a list containing lines of text. + """ + # Append one empty string to make sure a new line is added to the last + # line of text as well. + lines_of_text.append(u'') + + self.text = u'\n'.join(lines_of_text) + + +# TODO: Re-design the event object to make it lighter, perhaps template +# based. The current design is too slow and needs to be improved. +class EventObject(object): + """An event object is the main datastore for an event in plaso. + + The framework is designed to parse files and create an event + from every single record, line or key extracted from the file. + + An EventObject is the main data storage for an event in plaso. + + This class defines the high level interface of EventObject. + Before creating an EventObject a class needs to be implemented + that inherits from EventObject and implements the functions in it. + + The EventObject is then used by output processing for saving + in other forms, such as a protobuff, AFF4 container, CSV files, + databases, etc. + + The goal of the EventObject is to provide a easily extensible + data storage of each events internally in the tool. + + The main EventObject only exposes those functions that the + implementations need to implement. The functions that are needed + simply provide information about the event, or describe the + attributes that are necessary. How they are assembled is totally + up to the implementation. + + All required attributes of the EventObject are passed to the + constructor of the object while the optional ones are set + using the method SetValue(attribute, value). + """ + # This is a convenience variable to define event object as + # simple value objects. Its runtime equivalent data_type + # should be used in code logic. + DATA_TYPE = '' + + # This is a reserved variable just used for comparison operation and defines + # attributes that should not be used during evaluation of whether two + # EventObjects are the same. + COMPARE_EXCLUDE = frozenset([ + 'timestamp', 'inode', 'pathspec', 'filename', 'uuid', + 'data_type', 'display_name', 'store_number', 'store_index', 'tag']) + + def __init__(self): + """Initializes the event object.""" + self.uuid = uuid.uuid4().get_hex() + if self.DATA_TYPE: + self.data_type = self.DATA_TYPE + + def EqualityString(self): + """Return a string describing the EventObject in terms of object equality. + + The details of this function must match the logic of __eq__. EqualityStrings + of two event objects should be the same if and only if the EventObjects are + equal as described in __eq__. + + Returns: + String: will match another EventObject's Equality String if and only if + the EventObjects are equal + """ + fields = sorted(list(self.GetAttributes().difference(self.COMPARE_EXCLUDE))) + + # TODO: Review this (after 1.1.0 release). Is there a better/more clean + # method of removing the timestamp description field out of the fields list? + parser = getattr(self, 'parser', u'') + if parser == u'filestat': + # We don't want to compare the timestamp description field when comparing + # filestat events. This is done to be able to join together FILE events + # that have the same timestamp, yet different description field (as in an + # event that has for instance the same timestamp for mtime and atime, + # joining it together into a single event). + try: + timestamp_desc_index = fields.index('timestamp_desc') + del fields[timestamp_desc_index] + except ValueError: + pass + + basic = [self.timestamp, self.data_type] + attributes = [] + for attribute in fields: + value = getattr(self, attribute) + if type(value) is dict: + attributes.append(sorted(value.items())) + elif type(value) is set: + attributes.append(sorted(list(value))) + else: + attributes.append(value) + identity = basic + [x for pair in zip(fields, attributes) for x in pair] + + if parser == 'filestat': + inode = getattr(self, 'inode', 'a') + if inode == 'a': + inode = '_' + str(uuid.uuid4()) + identity.append('inode') + identity.append(inode) + + return u'|'.join(map(unicode, identity)) + + def __eq__(self, event_object): + """Return a boolean indicating if two EventObject are considered equal. + + Compares two EventObject objects together and evaluates if they are + the same or close enough to be considered to represent the same event. + + For two EventObject objects to be considered the same they need to + have the following conditions: + + Have the same timestamp. + + Have the same data_type value. + + Have the same set of attributes. + + Compare all other attributes than those that are reserved, and + they all have to match. + + The following attributes are considered to be 'reserved' and not used + for the comparison, so they may be different yet the EventObject is still + considered to be equal: + + inode + + pathspec + + filename + + display_name + + store_number + + store_index + + Args: + event_object: The EventObject that is being compared to this one. + + Returns: + True: if both EventObjects are considered equal, otherwise False. + """ + + # Note: if this method changes, the above EqualityString method MUST be + # updated as well + if not isinstance(event_object, EventObject): + return False + + if self.timestamp != event_object.timestamp: + return False + + if self.data_type != event_object.data_type: + return False + + attributes = self.GetAttributes() + if attributes != event_object.GetAttributes(): + return False + + # Here we have to deal with "near" duplicates, so not all attributes + # should be compared. + for attribute in attributes.difference(self.COMPARE_EXCLUDE): + if getattr(self, attribute) != getattr(event_object, attribute): + return False + + # If we are dealing with the stat parser the inode number is the one + # attribute that really matters, unlike others. + if 'filestat' in getattr(self, 'parser', ''): + return utils.GetUnicodeString(getattr( + self, 'inode', 'a')) == utils.GetUnicodeString(getattr( + event_object, 'inode', 'b')) + + return True + + def GetAttributes(self): + """Return a list of all defined attributes.""" + return set(self.__dict__.keys()) + + def GetValues(self): + """Returns a dictionary of all defined attributes and their values.""" + values = {} + for attribute_name in self.GetAttributes(): + values[attribute_name] = getattr(self, attribute_name) + return values + + def GetString(self): + """Return a unicode string representation of an EventObject.""" + return unicode(self) + + def __str__(self): + """Return a string object of the EventObject.""" + return unicode(self).encode('utf-8') + + def __unicode__(self): + """Print a human readable string from the EventObject.""" + out_write = [] + + out_write.append(u'+-' * 40) + out_write.append(u'[Timestamp]:\n {0:s}'.format( + timelib.Timestamp.CopyToIsoFormat(self.timestamp))) + out_write.append(u'\n[Message Strings]:') + + # TODO: move formatting testing to a formatters (manager) test. + event_formatter = formatters_manager.EventFormatterManager.GetFormatter( + self) + if not event_formatter: + out_write.append(u'None') + else: + msg, msg_short = event_formatter.GetMessages(self) + source_short, source_long = event_formatter.GetSources(self) + out_write.append(u'{2:>7}: {0}\n{3:>7}: {1}\n'.format( + utils.GetUnicodeString(msg_short), utils.GetUnicodeString(msg), + 'Short', 'Long')) + out_write.append(u'{2:>7}: {0}\n{3:>7}: {1}\n'.format( + utils.GetUnicodeString(source_short), + utils.GetUnicodeString(source_long), 'Source Short', 'Source Long')) + + if hasattr(self, 'pathspec'): + pathspec_string = self.pathspec.comparable + out_write.append(u'[Pathspec]:\n {0:s}\n'.format( + pathspec_string.replace('\n', '\n '))) + + out_additional = [] + out_write.append(u'[Reserved attributes]:') + out_additional.append(u'[Additional attributes]:') + + for attr_key, attr_value in sorted(self.GetValues().items()): + if attr_key in utils.RESERVED_VARIABLES: + if attr_key == 'pathspec': + continue + else: + out_write.append( + u' {{{key}}} {value}'.format(key=attr_key, value=attr_value)) + else: + out_additional.append( + u' {{{key}}} {value}'.format(key=attr_key, value=attr_value)) + + out_write.append(u'\n') + out_additional.append(u'') + + part_1 = u'\n'.join(out_write) + part_2 = u'\n'.join(out_additional) + return part_1 + part_2 + + +class EventTag(object): + """A native Python object for the EventTagging protobuf. + + The EventTag object should have the following attributes: + (optional attributes surrounded with brackets) + + store_number: An integer, pointing to the store the EventObject is. + + store_index: An index into the store where the EventObject is. + + event_uuid: An UUID value of the event this tag belongs to. + + [comment]: An arbitrary string containing comments about the event. + + [color]: A string containing color information. + + [tags]: A list of strings with tags, eg: 'Malware', 'Entry Point'. + + The tag either needs to have an event_uuid defined or both the store_number + and store_index to be valid (not both, if both defined the store_number and + store_index will be used). + """ + + # TODO: Enable __slots__ once we tested the first round of changes. + @property + def string_key(self): + """Return a string index key for this tag.""" + if not self.IsValidForSerialization(): + return '' + + uuid_string = getattr(self, 'event_uuid', None) + if uuid_string: + return uuid_string + + return u'{}:{}'.format(self.store_number, self.store_index) + + def GetString(self): + """Retrieves a string representation of the event.""" + ret = [] + ret.append(u'-' * 50) + if getattr(self, 'store_number', 0): + ret.append(u'{0:>7}:\n\tNumber: {1}\n\tIndex: {2}'.format( + 'Store', self.store_number, self.store_index)) + else: + ret.append(u'{0:>7}:\n\tUUID: {1}'.format('Store', self.event_uuid)) + if hasattr(self, 'comment'): + ret.append(u'{:>7}: {}'.format('Comment', self.comment)) + if hasattr(self, 'color'): + ret.append(u'{:>7}: {}'.format('Color', self.color)) + if hasattr(self, 'tags'): + ret.append(u'{:>7}: {}'.format('Tags', u','.join(self.tags))) + + return u'\n'.join(ret) + + def IsValidForSerialization(self): + """Return whether or not this is a valid tag object.""" + if getattr(self, 'event_uuid', None): + return True + + if getattr(self, 'store_number', 0) and getattr( + self, 'store_index', -1) >= 0: + return True + + return False + + +class PreprocessObject(object): + """Object used to store all information gained from preprocessing.""" + + def __init__(self): + """Initializes the preprocess object.""" + super(PreprocessObject, self).__init__() + self._user_ids_to_names = None + self.zone = pytz.UTC + + def GetUserMappings(self): + """Returns a dictionary objects mapping SIDs or UIDs to usernames.""" + if self._user_ids_to_names is None: + self._user_ids_to_names = {} + + if self._user_ids_to_names: + return self._user_ids_to_names + + for user in getattr(self, 'users', []): + if 'sid' in user: + user_id = user.get('sid', u'') + elif 'uid' in user: + user_id = user.get('uid', u'') + else: + user_id = u'' + + if user_id: + self._user_ids_to_names[user_id] = user.get('name', user_id) + + return self._user_ids_to_names + + def GetUsernameById(self, user_id): + """Returns a username for a specific user identifier. + + Args: + user_id: The user identifier, either a SID or UID. + + Returns: + If available the user name for the identifier, otherwise the string '-'. + """ + user_ids_to_names = self.GetUserMappings() + + return user_ids_to_names.get(user_id, '-') + + # TODO: change to property with getter and setter. + def SetTimezone(self, timezone_identifier): + """Sets the timezone. + + Args: + timezone_identifier: string containing the identifier of the timezone, + e.g. 'UTC' or 'Iceland'. + """ + try: + self.zone = pytz.timezone(timezone_identifier) + except pytz.UnknownTimeZoneError as exception: + logging.warning( + u'Unable to set timezone: {0:s} with error: {1:s}.'.format( + timezone_identifier, exception)) + + def SetCollectionInformationValues(self, dict_object): + """Sets the collection information values. + + Args: + dict_object: dictionary object containing the collection information + values. + """ + self.collection_information = dict(dict_object) + + if 'configure_zone' in self.collection_information: + self.collection_information['configure_zone'] = pytz.timezone( + self.collection_information['configure_zone']) + + def SetCounterValues(self, dict_object): + """Sets the counter values. + + Args: + dict_object: dictionary object containing the counter values. + """ + self.counter = collections.Counter() + for key, value in dict_object.iteritems(): + self.counter[key] = value + + def SetPluginCounterValues(self, dict_object): + """Sets the plugin counter values. + + Args: + dict_object: dictionary object containing the plugin counter values. + """ + self.plugin_counter = collections.Counter() + for key, value in dict_object.iteritems(): + self.plugin_counter[key] = value + + +# Named tuple that defines a parse error. +# +# Attributes: +# name: The parser or plugin name. +# description: The description of the error. +# path_spec: Optional path specification of the file entry (instance of +# dfvfs.PathSpec). The default is None. +ParseError = collections.namedtuple( + 'ParseError', 'name description path_spec') diff --git a/plaso/lib/event_test.py b/plaso/lib/event_test.py new file mode 100644 index 0000000..16e7c95 --- /dev/null +++ b/plaso/lib/event_test.py @@ -0,0 +1,324 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a unit test for the EventObject. + +This is an implementation of an unit test for EventObject storage mechanism for +plaso. + +The test consists of creating six EventObjects. + +Error handling. The following tests are performed for error handling: + + Access attributes that are not set. +""" + +import unittest + +from plaso.events import text_events +from plaso.events import windows_events +from plaso.lib import event +from plaso.lib import timelib_test + + +class TestEvent1(event.EventObject): + """A test event object.""" + DATA_TYPE = 'test:event1' + + def __init__(self, timestamp, attributes): + """Initializes the test event object.""" + super(TestEvent1, self).__init__() + self.timestamp = timestamp + self.timestamp_desc = 'Some time in the future' + for attribute, value in attributes.iteritems(): + setattr(self, attribute, value) + + +class FailEvent(event.EventObject): + """An test event object without the minimal required initialization.""" + + +def GetEventObjects(): + """Returns a list of test event objects.""" + event_objects = [] + hostname = 'MYHOSTNAME' + data_type = 'test:event1' + + event_a = event.EventObject() + event_a.username = 'joesmith' + event_a.filename = 'c:/Users/joesmith/NTUSER.DAT' + event_a.hostname = hostname + event_a.timestamp = 0 + event_a.data_type = data_type + + # TODO: move this to a WindowRegistrysEvent unit test. + timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-20 22:38:46.929596') + event_b = windows_events.WindowsRegistryEvent( + timestamp, u'MY AutoRun key', {u'Run': u'c:/Temp/evil.exe'}) + event_b.hostname = hostname + event_objects.append(event_b) + + timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-20 23:56:46.929596') + event_c = windows_events.WindowsRegistryEvent( + timestamp, u'//HKCU/Secret/EvilEmpire/Malicious_key', + {u'Value': u'send all the exes to the other world'}) + event_c.hostname = hostname + event_objects.append(event_c) + + timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-20 16:44:46.000000') + event_d = windows_events.WindowsRegistryEvent( + timestamp, u'//HKCU/Windows/Normal', + {u'Value': u'run all the benign stuff'}) + event_d.hostname = hostname + event_objects.append(event_d) + + event_objects.append(event_a) + + timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-30 10:29:47.929596') + filename = 'c:/Temp/evil.exe' + event_e = TestEvent1(timestamp, { + 'text': 'This log line reads ohh so much.'}) + event_e.filename = filename + event_e.hostname = hostname + + event_objects.append(event_e) + + timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-30 10:29:47.929596') + event_f = TestEvent1(timestamp, { + 'text': 'Nothing of interest here, move on.'}) + event_f.filename = filename + event_f.hostname = hostname + + event_objects.append(event_f) + + timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-30 13:06:47.939596') + event_g = TestEvent1(timestamp, { + 'text': 'Mr. Evil just logged into the machine and got root.'}) + event_g.filename = filename + event_g.hostname = hostname + + event_objects.append(event_g) + + text_dict = {'body': ( + u'This is a line by someone not reading the log line properly. And ' + u'since this log line exceeds the accepted 80 chars it will be ' + u'shortened.'), 'hostname': u'nomachine', 'username': u'johndoe'} + + # TODO: move this to a TextEvent unit test. + timestamp = timelib_test.CopyStringToTimestamp( + '2012-06-05 22:14:19.000000') + event_h = text_events.TextEvent(timestamp, 12, text_dict) + event_h.text = event_h.body + event_h.hostname = hostname + event_h.filename = filename + + event_objects.append(event_h) + + return event_objects + + +class EventObjectTest(unittest.TestCase): + """Tests for the event object.""" + + def testSameEvent(self): + """Test the EventObject comparison.""" + event_a = event.EventObject() + event_b = event.EventObject() + event_c = event.EventObject() + event_d = event.EventObject() + event_e = event.EventObject() + + event_a.timestamp = 123 + event_a.timestamp_desc = u'LAST WRITTEN' + event_a.data_type = 'mock:nothing' + event_a.inode = 124 + event_a.filename = u'c:/bull/skrytinmappa/skra.txt' + event_a.another_attribute = False + event_a.metadata = { + u'author': u'Some Random Dude', + u'version': 1245L, + u'last_changed': u'Long time ago'} + event_a.strings = [ + u'This ', u'is a ', u'long string'] + + event_b.timestamp = 123 + event_b.timestamp_desc = 'LAST WRITTEN' + event_b.data_type = 'mock:nothing' + event_b.inode = 124 + event_b.filename = 'c:/bull/skrytinmappa/skra.txt' + event_b.another_attribute = False + event_b.metadata = { + 'author': 'Some Random Dude', + 'version': 1245L, + 'last_changed': 'Long time ago'} + event_b.strings = [ + 'This ', 'is a ', 'long string'] + + event_c.timestamp = 123 + event_c.timestamp_desc = 'LAST UPDATED' + event_c.data_type = 'mock:nothing' + event_c.inode = 124 + event_c.filename = 'c:/bull/skrytinmappa/skra.txt' + event_c.another_attribute = False + + event_d.timestamp = 14523 + event_d.timestamp_desc = 'LAST WRITTEN' + event_d.data_type = 'mock:nothing' + event_d.inode = 124 + event_d.filename = 'c:/bull/skrytinmappa/skra.txt' + event_d.another_attribute = False + + event_e.timestamp = 123 + event_e.timestamp_desc = 'LAST WRITTEN' + event_e.data_type = 'mock:nothing' + event_e.inode = 623423 + event_e.filename = 'c:/afrit/onnurskra.txt' + event_e.another_attribute = False + event_e.metadata = { + 'author': 'Some Random Dude', + 'version': 1245, + 'last_changed': 'Long time ago'} + event_e.strings = [ + 'This ', 'is a ', 'long string'] + + self.assertEquals(event_a, event_b) + self.assertNotEquals(event_a, event_c) + self.assertEquals(event_a, event_e) + self.assertNotEquals(event_c, event_d) + + def testEqualityString(self): + """Test the EventObject EqualityString.""" + event_a = event.EventObject() + event_b = event.EventObject() + event_c = event.EventObject() + event_d = event.EventObject() + event_e = event.EventObject() + event_f = event.EventObject() + + event_a.timestamp = 123 + event_a.timestamp_desc = 'LAST WRITTEN' + event_a.data_type = 'mock:nothing' + event_a.inode = 124 + event_a.filename = 'c:/bull/skrytinmappa/skra.txt' + event_a.another_attribute = False + + event_b.timestamp = 123 + event_b.timestamp_desc = 'LAST WRITTEN' + event_b.data_type = 'mock:nothing' + event_b.inode = 124 + event_b.filename = 'c:/bull/skrytinmappa/skra.txt' + event_b.another_attribute = False + + event_c.timestamp = 123 + event_c.timestamp_desc = 'LAST UPDATED' + event_c.data_type = 'mock:nothing' + event_c.inode = 124 + event_c.filename = 'c:/bull/skrytinmappa/skra.txt' + event_c.another_attribute = False + + event_d.timestamp = 14523 + event_d.timestamp_desc = 'LAST WRITTEN' + event_d.data_type = 'mock:nothing' + event_d.inode = 124 + event_d.filename = 'c:/bull/skrytinmappa/skra.txt' + event_d.another_attribute = False + + event_e.timestamp = 123 + event_e.timestamp_desc = 'LAST WRITTEN' + event_e.data_type = 'mock:nothing' + event_e.inode = 623423 + event_e.filename = 'c:/afrit/öñṅûŗ₅ḱŖūα.txt' + event_e.another_attribute = False + + event_f.timestamp = 14523 + event_f.timestamp_desc = 'LAST WRITTEN' + event_f.data_type = 'mock:nothing' + event_f.inode = 124 + event_f.filename = 'c:/bull/skrytinmappa/skra.txt' + event_f.another_attribute = False + event_f.weirdness = 'I am a potato' + + self.assertEquals(event_a.EqualityString(), event_b.EqualityString()) + self.assertNotEquals(event_a.EqualityString(), event_c.EqualityString()) + self.assertEquals(event_a.EqualityString(), event_e.EqualityString()) + self.assertNotEquals(event_c.EqualityString(), event_d.EqualityString()) + self.assertNotEquals(event_d.EqualityString(), event_f.EqualityString()) + + def testEqualityFileStatParserMissingInode(self): + """Test that FileStatParser files with missing inodes are distinct""" + event_a = event.EventObject() + event_b = event.EventObject() + + event_a.timestamp = 123 + event_a.timestamp_desc = 'LAST WRITTEN' + event_a.data_type = 'mock:nothing' + event_a.parser = 'filestat' + event_a.filename = 'c:/bull/skrytinmappa/skra.txt' + event_a.another_attribute = False + + event_b.timestamp = 123 + event_b.timestamp_desc = 'LAST WRITTEN' + event_b.data_type = 'mock:nothing' + event_b.parser = 'filestat' + event_b.filename = 'c:/bull/skrytinmappa/skra.txt' + event_b.another_attribute = False + + self.assertNotEquals(event_a, event_b) + + def testEqualityStringFileStatParserMissingInode(self): + """Test that FileStatParser files with missing inodes are distinct""" + event_a = event.EventObject() + event_b = event.EventObject() + + event_a.timestamp = 123 + event_a.timestamp_desc = 'LAST WRITTEN' + event_a.data_type = 'mock:nothing' + event_a.parser = 'filestat' + event_a.filename = 'c:/bull/skrytinmappa/skra.txt' + event_a.another_attribute = False + + event_b.timestamp = 123 + event_b.timestamp_desc = 'LAST WRITTEN' + event_b.data_type = 'mock:nothing' + event_b.parser = 'filestat' + event_b.filename = 'c:/bull/skrytinmappa/skra.txt' + event_b.another_attribute = False + + self.assertNotEquals(event_a.EqualityString(), event_b.EqualityString()) + + def testNotInEventAndNoParent(self): + """Call to an attribute that does not exist.""" + event_object = TestEvent1(0, {}) + + with self.assertRaises(AttributeError): + getattr(event_object, 'doesnotexist') + + def testFailEvent(self): + """Calls to format_string_short that has not been defined.""" + e = FailEvent() + + with self.assertRaises(AttributeError): + getattr(e, 'format_string_short') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/lib/eventdata.py b/plaso/lib/eventdata.py new file mode 100644 index 0000000..9a45685 --- /dev/null +++ b/plaso/lib/eventdata.py @@ -0,0 +1,65 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A place to store information about events, such as format strings, etc.""" + + +# TODO: move this class to events/definitions.py or equiv. +class EventTimestamp(object): + """Class to manage event data.""" + # The timestamp_desc values. + ACCESS_TIME = u'Last Access Time' + CHANGE_TIME = u'Metadata Modification Time' + CREATION_TIME = u'Creation Time' + MODIFICATION_TIME = u'Content Modification Time' + ENTRY_MODIFICATION_TIME = u'Metadata Modification Time' + # Added time and Creation time are considered the same. + ADDED_TIME = u'Creation Time' + # Written time and Modification time are considered the same. + WRITTEN_TIME = u'Content Modification Time' + EXIT_TIME = u'Exit Time' + LAST_RUNTIME = u'Last Time Executed' + DELETED_TIME = u'Content Deletion Time' + + FILE_DOWNLOADED = u'File Downloaded' + PAGE_VISITED = u'Page Visited' + # TODO: change page visited into last visited time. + LAST_VISITED_TIME = u'Last Visited Time' + + LAST_CHECKED_TIME = u'Last Checked Time' + + EXPIRATION_TIME = u'Expiration Time' + START_TIME = u'Start Time' + END_TIME = u'End Time' + + LAST_SHUTDOWN = u'Last Shutdown Time' + + ACCOUNT_CREATED = u'Account Created' + LAST_LOGIN_TIME = u'Last Login Time' + LAST_PASSWORD_RESET = u'Last Password Reset' + + FIRST_CONNECTED = u'First Connection Time' + LAST_CONNECTED = u'Last Connection Time' + + LAST_PRINTED = u'Last Printed Time' + + LAST_RESUME_TIME = u'Last Resume Time' + + # Note that the unknown time is used for date and time values + # of which the exact meaning is unknown and being researched. + # For most cases do not use this timestamp description. + UNKNOWN = u'Unknown Time' diff --git a/plaso/lib/filter_interface.py b/plaso/lib/filter_interface.py new file mode 100644 index 0000000..7047614 --- /dev/null +++ b/plaso/lib/filter_interface.py @@ -0,0 +1,94 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A definition of the filter interface for filters in plaso.""" + +import abc + +from plaso.lib import errors +from plaso.lib import registry + + +class FilterObject(object): + """The interface that each filter needs to implement in plaso.""" + + __metaclass__ = registry.MetaclassRegistry + __abstract = True + + @property + def filter_name(self): + """Return the name of the filter.""" + return self.__class__.__name__ + + @property + def last_decision(self): + """Return the last matching decision.""" + return getattr(self, '_decision', None) + + @property + def last_reason(self): + """Return the last reason for the match, if there was one.""" + if getattr(self, 'last_decision', False): + return getattr(self, '_reason', '') + + @property + def fields(self): + """Return a list of fields for adaptive output modules.""" + return [] + + @property + def separator(self): + """Return a separator for adaptive output modules.""" + return ',' + + @property + def limit(self): + """Returns the max number of records to return, or zero for all records.""" + return 0 + + @abc.abstractmethod + def CompileFilter(self, unused_filter_string): + """Verify filter string and prepare the filter for later usage. + + This function verifies the filter string matches the definition of + the class and if necessary compiles or prepares the filter so it can start + matching against passed in EventObjects. + + Args: + unused_filter_string: A string passed in that should be recognized by + the filter class. + + Raises: + errors.WrongPlugin: If this filter string does not match the filter + class. + """ + raise errors.WrongPlugin('Not the correct filter for this string.') + + def Match(self, unused_event_object): + """Compare an EventObject to the filter expression and return a boolean. + + This function returns True if the filter should be passed through the filter + and False otherwise. + + Args: + unused_event_object: An event object (instance of EventObject) that + should be evaluated against the filter. + + Returns: + Boolean indicating whether the filter matches the object or not. + """ + return False diff --git a/plaso/lib/lexer.py b/plaso/lib/lexer.py new file mode 100644 index 0000000..d40f624 --- /dev/null +++ b/plaso/lib/lexer.py @@ -0,0 +1,514 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""An LL(1) lexer. This lexer is very tolerant of errors and can resync. + +This lexer is originally copied from the GRR project: +https://code.google.com/p/grr +""" + +import logging +import re + + +class Token(object): + """A token action.""" + + def __init__(self, state_regex, regex, actions, next_state, flags=re.I): + """Initializes the token object. + + Args: + + state_regex: If this regular expression matches the current state this + rule is considered. + regex: A regular expression to try and match from the current point. + actions: A command separated list of method names in the Lexer to call. + next_state: The next state we transition to if this Token matches. + flags: re flags. + """ + self.state_regex = re.compile( + state_regex, re.DOTALL | re.M | re.S | re.U | flags) + self.regex = re.compile(regex, re.DOTALL | re.M | re.S | re.U | flags) + self.re_str = regex + self.actions = [] + if actions: + self.actions = actions.split(',') + + self.next_state = next_state + + def Action(self, lexer): + """Method is called when the token matches.""" + + +class Error(Exception): + """Module exception.""" + + +class ParseError(Error): + """A parse error occured.""" + + +class Lexer(object): + """A generic feed lexer.""" + _CONTINUE_STATE = 'CONTINUE' + _INITIAL_STATE = 'INITIAL' + + _ERROR_TOKEN = 'Error' + + # A list of Token() instances. + tokens = [] + + def __init__(self, data=''): + """Initializes the lexer object.""" + super(Lexer, self).__init__() + self.buffer = data + self.error = 0 + self.flags = 0 + self.processed = 0 + self.processed_buffer = '' + self.state = self._INITIAL_STATE + self.state_stack = [] + self.verbose = 0 + + def NextToken(self): + """Fetch the next token by trying to match any of the regexes in order.""" + current_state = self.state + for token in self.tokens: + # Does the rule apply to us? + if not token.state_regex.match(current_state): + continue + + # Try to match the rule + m = token.regex.match(self.buffer) + if not m: + continue + + # The match consumes the data off the buffer (the handler can put it back + # if it likes) + # TODO: using joins might be more efficient here. + self.processed_buffer += self.buffer[:m.end()] + self.buffer = self.buffer[m.end():] + self.processed += m.end() + + next_state = token.next_state + for action in token.actions: + + # Is there a callback to handle this action? + callback = getattr(self, action, self.Default) + + # Allow a callback to skip other callbacks. + try: + possible_next_state = callback(string=m.group(0), match=m) + if possible_next_state == self._CONTINUE_STATE: + continue + # Override the state from the Token + elif possible_next_state: + next_state = possible_next_state + except ParseError as exception: + self.Error(exception) + + # Update the next state + if next_state: + self.state = next_state + + return token + + # Check that we are making progress - if we are too full, we assume we are + # stuck. + self.Error(u'Expected {0:s}'.format(self.state)) + self.processed_buffer += self.buffer[:1] + self.buffer = self.buffer[1:] + return self._ERROR_TOKEN + + def Feed(self, data): + """Feed the buffer with data.""" + self.buffer = ''.join([self.buffer, data]) + + def Empty(self): + """Return a boolean indicating if the buffer is empty.""" + return not self.buffer + + def Default(self, **kwarg): + """The default callback handler.""" + logging.debug(u'Default handler: {0:s}'.format(kwarg)) + + def Error(self, message=None, weight=1): + """Log an error down.""" + logging.debug(u'Error({0:d}): {1:s}'.format(weight, message)) + # Keep a count of errors + self.error += weight + + def PushState(self, **_): + """Push the current state on the state stack.""" + logging.debug(u'Storing state {0:s}'.format(repr(self.state))) + self.state_stack.append(self.state) + + def PopState(self, **_): + """Pop the previous state from the stack.""" + try: + self.state = self.state_stack.pop() + logging.debug(u'Returned state to {0:s}'.format(self.state)) + + return self.state + except IndexError: + self.Error( + u'Tried to pop the state but failed - possible recursion error') + + def PushBack(self, string='', **_): + """Push the match back on the stream.""" + self.buffer = string + self.buffer + self.processed_buffer = self.processed_buffer[:-len(string)] + + def Close(self): + """A convenience function to force us to parse all the data.""" + while self.NextToken(): + if not self.buffer: + return + + +class SelfFeederMixIn(Lexer): + """This mixin is used to make a lexer which feeds itself. + + Note that self.file_object must be the file object we read from. + """ + + # TODO: fix this, file object either needs to be set or not passed here. + def __init__(self, file_object=None): + """Initializes the lexer feeder min object. + + Args: + file_object: Optional file-like object. The default is None. + """ + super(SelfFeederMixIn, self).__init__() + self.file_object = file_object + + def NextToken(self): + """Return the next token.""" + # If we don't have enough data - feed ourselves: We assume + # that we must have at least one sector in our buffer. + if len(self.buffer) < 512: + if self.Feed() == 0 and not self.buffer: + return None + + return Lexer.NextToken(self) + + def Feed(self, size=512): + """Feed data into the buffer.""" + data = self.file_object.read(size) + Lexer.Feed(self, data) + return len(data) + + +class Expression(object): + """A class representing an expression.""" + attribute = None + args = None + operator = None + + # The expected number of args + number_of_args = 1 + + def __init__(self): + """Initializes the expression object.""" + self.args = [] + + def SetAttribute(self, attribute): + """Set the attribute.""" + self.attribute = attribute + + def SetOperator(self, operator): + """Set the operator.""" + self.operator = operator + + def AddArg(self, arg): + """Adds a new arg to this expression. + + Args: + arg: The argument to add (string). + + Returns: + True if this arg is the last arg, False otherwise. + + Raises: + ParseError: If there are too many args. + """ + self.args.append(arg) + if len(self.args) > self.number_of_args: + raise ParseError(u'Too many args for this expression.') + + elif len(self.args) == self.number_of_args: + return True + + return False + + def __str__(self): + """Return a string representation of the expression.""" + return 'Expression: ({0:s}) ({1:s}) {2:s}'.format( + self.attribute, self.operator, self.args) + + # TODO: rename this function to GetTreeAsString or equivalent. + def PrintTree(self, depth=''): + """Print the tree.""" + return u'{0:s} {1:s}'.format(depth, self) + + def Compile(self, unused_filter_implemention): + """Given a filter implementation, compile this expression.""" + raise NotImplementedError( + u'{0:s} does not implement Compile.'.format(self.__class__.__name__)) + + +class BinaryExpression(Expression): + """An expression which takes two other expressions.""" + + def __init__(self, operator='', part=None): + """Initializes the expression object.""" + self.operator = operator + self.args = [] + if part: + self.args.append(part) + super(BinaryExpression, self).__init__() + + def __str__(self): + """Return a string representation of the binary expression.""" + return 'Binary Expression: {0:s} {1:s}'.format( + self.operator, [str(x) for x in self.args]) + + def AddOperands(self, lhs, rhs): + """Add an operant.""" + if isinstance(lhs, Expression) and isinstance(rhs, Expression): + self.args = [lhs, rhs] + else: + raise ParseError(u'Expected expression, got {0:s} {1:s} {2:s}'.format( + lhs, self.operator, rhs)) + + # TODO: rename this function to GetTreeAsString or equivalent. + def PrintTree(self, depth=''): + """Print the tree.""" + result = u'{0:s}{1:s}\n'.format(depth, self.operator) + for part in self.args: + result += u'{0:s}-{1:s}\n'.format(depth, part.PrintTree(depth + ' ')) + + return result + + def Compile(self, filter_implemention): + """Compile the binary expression into a filter object.""" + operator = self.operator.lower() + if operator == 'and' or operator == '&&': + method = 'AndFilter' + elif operator == 'or' or operator == '||': + method = 'OrFilter' + else: + raise ParseError(u'Invalid binary operator {0:s}'.format(operator)) + + args = [x.Compile(filter_implemention) for x in self.args] + return getattr(filter_implemention, method)(*args) + + +class IdentityExpression(Expression): + """An Expression which always evaluates to True.""" + + def Compile(self, filter_implemention): + """Compile the expression.""" + return filter_implemention.IdentityFilter() + + +class SearchParser(Lexer): + """This parser can parse the mini query language and build an AST. + + Examples of valid syntax: + filename contains "foo" and (size > 100k or date before "2011-10") + date between 2011 and 2010 + files older than 1 year + """ + + expression_cls = Expression + binary_expression_cls = BinaryExpression + + tokens = [ + # Double quoted string + Token('STRING', '"', 'PopState,StringFinish', None), + Token('STRING', r'\\(.)', 'StringEscape', None), + Token('STRING', r'[^\\"]+', 'StringInsert', None), + + # Single quoted string + Token('SQ_STRING', '\'', 'PopState,StringFinish', None), + Token('SQ_STRING', r'\\(.)', 'StringEscape', None), + Token('SQ_STRING', r'[^\\\']+', 'StringInsert', None), + + # TODO: Implement a unary not operator. + # The first thing we see in the initial state takes up to the ATTRIBUTE + Token('INITIAL', r'(and|or|\&\&|\|\|)', 'BinaryOperator', None), + Token('INITIAL', r'[^\s\(\)]', 'PushState,PushBack', 'ATTRIBUTE'), + Token('INITIAL', r'\(', 'BracketOpen', None), + Token('INITIAL', r'\)', 'BracketClose', None), + + Token('ATTRIBUTE', r'[\w._0-9]+', 'StoreAttribute', 'OPERATOR'), + Token('OPERATOR', r'[a-z0-9<>=\-\+\!\^\&%]+', 'StoreOperator', + 'ARG_LIST'), + Token('OPERATOR', r'(!=|[<>=])', 'StoreSpecialOperator', 'ARG_LIST'), + Token('ARG_LIST', r'[^\s\'"]+', 'InsertArg', None), + + # Start a string. + Token('.', '"', 'PushState,StringStart', 'STRING'), + Token('.', '\'', 'PushState,StringStart', 'SQ_STRING'), + + # Skip whitespace. + Token('.', r'\s+', None, None), + ] + + def __init__(self, data): + """Initializes the search parser object.""" + # Holds expression + self.current_expression = self.expression_cls() + self.filter_string = data + + # The token stack + self.stack = [] + Lexer.__init__(self, data) + + def BinaryOperator(self, string=None, **_): + """Set the binary operator.""" + self.stack.append(self.binary_expression_cls(string)) + + def BracketOpen(self, **_): + """Define an open bracket.""" + self.stack.append('(') + + def BracketClose(self, **_): + """Close the bracket.""" + self.stack.append(')') + + def StringStart(self, **_): + """Initialize the string.""" + self.string = '' + + def StringEscape(self, string, match, **_): + """Escape backslashes found inside a string quote. + + Backslashes followed by anything other than ['"rnbt] will just be included + in the string. + + Args: + string: The string that matched. + match: The match object (m.group(1) is the escaped code) + """ + if match.group(1) in '\'"rnbt': + self.string += string.decode('string_escape') + else: + self.string += string + + def StringInsert(self, string='', **_): + """Add to the string.""" + self.string += string + + def StringFinish(self, **_): + """Finish the string operation.""" + if self.state == 'ATTRIBUTE': + return self.StoreAttribute(string=self.string) + + elif self.state == 'ARG_LIST': + return self.InsertArg(string=self.string) + + def StoreAttribute(self, string='', **_): + """Store the attribute.""" + logging.debug(u'Storing attribute {0:s}'.format(repr(string))) + + # TODO: Update the expected number_of_args + try: + self.current_expression.SetAttribute(string) + except AttributeError: + raise ParseError(u'Invalid attribute \'{0:s}\''.format(string)) + + return 'OPERATOR' + + def StoreOperator(self, string='', **_): + """Store the operator.""" + logging.debug(u'Storing operator {0:s}'.format(repr(string))) + self.current_expression.SetOperator(string) + + def InsertArg(self, string='', **_): + """Insert an arg to the current expression.""" + logging.debug(u'Storing Argument {0:s}'.format(string)) + + # This expression is complete + if self.current_expression.AddArg(string): + self.stack.append(self.current_expression) + self.current_expression = self.expression_cls() + return self.PopState() + + def _CombineBinaryExpressions(self, operator): + """Combine binary expressions.""" + for i in range(1, len(self.stack)-1): + item = self.stack[i] + if (isinstance(item, BinaryExpression) and item.operator == operator and + isinstance(self.stack[i-1], Expression) and + isinstance(self.stack[i+1], Expression)): + lhs = self.stack[i-1] + rhs = self.stack[i+1] + + self.stack[i].AddOperands(lhs, rhs) + self.stack[i-1] = None + self.stack[i+1] = None + + self.stack = filter(None, self.stack) + + def _CombineParenthesis(self): + """Combine parenthesis.""" + for i in range(len(self.stack)-2): + if (self.stack[i] == '(' and self.stack[i+2] == ')' and + isinstance(self.stack[i+1], Expression)): + self.stack[i] = None + self.stack[i+2] = None + + self.stack = filter(None, self.stack) + + def Reduce(self): + """Reduce the token stack into an AST.""" + # Check for sanity + if self.state != 'INITIAL': + self.Error(u'Premature end of expression') + + length = len(self.stack) + while length > 1: + # Precendence order + self._CombineParenthesis() + self._CombineBinaryExpressions('and') + self._CombineBinaryExpressions('or') + + # No change + if len(self.stack) == length: + break + length = len(self.stack) + + if length != 1: + self.Error(u'Illegal query expression') + + return self.stack[0] + + def Error(self, message=None, unused_weight=1): + """Raise an error message.""" + raise ParseError(u'{0:s} in position {1:s}: {2:s} <----> {3:s} )'.format( + message, len(self.processed_buffer), self.processed_buffer, + self.buffer)) + + def Parse(self): + """Parse.""" + if not self.filter_string: + return IdentityExpression() + + self.Close() + return self.Reduce() diff --git a/plaso/lib/limit.py b/plaso/lib/limit.py new file mode 100644 index 0000000..68b6485 --- /dev/null +++ b/plaso/lib/limit.py @@ -0,0 +1,20 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains few class variables that define various limits.""" + + +MAX_INT64 = 2**64-1 diff --git a/plaso/lib/objectfilter.py b/plaso/lib/objectfilter.py new file mode 100644 index 0000000..b1fdc14 --- /dev/null +++ b/plaso/lib/objectfilter.py @@ -0,0 +1,925 @@ +#!/usr/bin/env python +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Originally copied from the GRR project: +# http://code.google.com/p/grr/source/browse/lib/objectfilter.py +# Copied on 11/15/2012 +# Minor changes made to make it work in plaso. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Classes to perform filtering of objects based on their data members. + +Given a list of objects and a textual filter expression, these classes allow +you to determine which objects match the filter. The system has two main +pieces: A parser for the supported grammar and a filter implementation. + +Given any complying user-supplied grammar, it is parsed with a custom lexer +based on GRR's lexer and then compiled into an actual implementation by using +the filter implementation. A filter implementation simply provides actual +implementations for the primitives required to perform filtering. The compiled +result is always a class supporting the Filter interface. + +If we define a class called Car such as: + + +class Car(object): + def __init__(self, code, color="white", doors=3): + self.code = code + self.color = color + self.doors = 3 + +And we have two instances: + + ford_ka = Car("FORDKA1", color="grey") + toyota_corolla = Car("COROLLA1", color="white", doors=5) + fleet = [ford_ka, toyota_corolla] + +We want to find cars that are grey and have 3 or more doors. We could filter +our fleet like this: + + criteria = "(color is grey) and (doors >= 3)" + parser = ContextFilterParser(criteria).Parse() + compiled_filter = parser.Compile(LowercaseAttributeFilterImp) + + for car in fleet: + if compiled_filter.Matches(car): + print "Car %s matches the supplied filter." % car.code + +The filter expression contains two subexpressions joined by an AND operator: + "color is grey" and "doors >= 3" +This means we want to search for objects matching these two subexpressions. +Let's analyze the first one in depth "color is grey": + + "color": the left operand specifies a search path to look for the data. This + tells our filtering system to look for the color property on passed objects. + "is": the operator. Values retrieved for the "color" property will be checked + against the right operand to see if they are equal. + "grey": the right operand. It specifies an explicit value to check for. + +So each time an object is passed through the filter, it will expand the value +of the color data member, and compare its value against "grey". + +Because data members of objects are often not simple datatypes but other +objects, the system allows you to reference data members within other data +members by separating each by a dot. Let's see an example: + +Let's add a more complex Car class with default tyre data: + + +class CarWithTyres(Car): + def __init__(self, code, tyres=None, color="white", doors=3): + super(self, CarWithTyres).__init__(code, color, doors) + tyres = tyres or Tyre("Pirelli", "PZERO") + + +class Tyre(object): + def __init__(self, brand, code): + self.brand = brand + self.code = code + +And two new instances: + ford_ka = CarWithTyres("FORDKA", color="grey", tyres=Tyre("AVON", "ZT5")) + toyota_corolla = Car("COROLLA1", color="white", doors=5) + fleet = [ford_ka, toyota_corolla] + +To filter a car based on the tyre brand, we would use a search path of +"tyres.brand". + +Because the filter implementation provides the actual classes that perform +handling of the search paths, operators, etc. customizing the behaviour of the +filter is easy. Three basic filter implementations are given: + BaseFilterImplementation: search path expansion is done on attribute names + as provided (case-sensitive). + LowercaseAttributeFilterImp: search path expansion is done on the lowercased + attribute name, so that it only accesses attributes, not methods. + DictFilterImplementation: search path expansion is done on dictionary access + to the given object. So "a.b" expands the object obj to obj["a"]["b"] +""" + +import abc +import binascii +import logging +import re + +from plaso.lib import lexer +from plaso.lib import utils + + +class Error(Exception): + """Base module exception.""" + + +class MalformedQueryError(Error): + """The provided filter query is malformed.""" + + +class ParseError(Error): + """The parser for textual queries returned invalid results.""" + + +class InvalidNumberOfOperands(Error): + """The number of operands provided to this operator is wrong.""" + + +class Filter(object): + """Base class for every filter.""" + + def __init__(self, arguments=None, value_expander=None): + """Constructor. + + Args: + arguments: Arguments to the filter. + value_expander: A callable that will be used to expand values for the + objects passed to this filter. Implementations expanders are provided by + subclassing ValueExpander. + + Raises: + Error: If the given value_expander is not a subclass of ValueExpander + """ + self.value_expander = None + self.value_expander_cls = value_expander + if self.value_expander_cls: + if not issubclass(self.value_expander_cls, ValueExpander): + raise Error(u'{0:s} is not a valid value expander'.format( + self.value_expander_cls)) + self.value_expander = self.value_expander_cls() + self.args = arguments or [] + logging.debug(u'Adding {0:s}'.format(arguments)) + + @abc.abstractmethod + def Matches(self, obj): + """Whether object obj matches this filter.""" + + def Filter(self, objects): + """Returns a list of objects that pass the filter.""" + return filter(self.Matches, objects) + + def __str__(self): + return '{0:s}({1:s})'.format( + self.__class__.__name__, ', '.join([str(arg) for arg in self.args])) + + +class AndFilter(Filter): + """Performs a boolean AND of the given Filter instances as arguments. + + Note that if no conditions are passed, all objects will pass. + """ + def Matches(self, obj): + for child_filter in self.args: + if not child_filter.Matches(obj): + return False + return True + + +class OrFilter(Filter): + """Performs a boolean OR of the given Filter instances as arguments. + + Note that if no conditions are passed, all objects will pass. + """ + def Matches(self, obj): + if not self.args: + return True + + for child_filter in self.args: + if child_filter.Matches(obj): + return True + return False + + +# pylint: disable=abstract-method +class Operator(Filter): + """Base class for all operators.""" + + +class IdentityFilter(Operator): + def Matches(self, _): + return True + + +class UnaryOperator(Operator): + """Base class for unary operators.""" + + def __init__(self, operand, **kwargs): + """Constructor.""" + super(UnaryOperator, self).__init__(arguments=[operand], **kwargs) + if len(self.args) != 1: + raise InvalidNumberOfOperands( + u'Only one operand is accepted by {0:s}. Received {1:d}.'.format( + self.__class__.__name__, len(self.args))) + + +class BinaryOperator(Operator): + """Base class for binary operators. + + The left operand is always a path into the object which will be expanded for + values. The right operand is a value defined at initialization and is stored + at self.right_operand. + """ + def __init__(self, arguments=None, **kwargs): + super(BinaryOperator, self).__init__(arguments=arguments, **kwargs) + if len(self.args) != 2: + raise InvalidNumberOfOperands( + u'Only two operands are accepted by {0:s}. Received {1:s}.'.format( + self.__class__.__name__, len(self.args))) + + self.left_operand = self.args[0] + self.right_operand = self.args[1] + + +class GenericBinaryOperator(BinaryOperator): + """Allows easy implementations of operators.""" + + def __init__(self, **kwargs): + super(GenericBinaryOperator, self).__init__(**kwargs) + self.bool_value = True + + def FlipBool(self): + logging.debug(u'Negative matching.') + self.bool_value = not self.bool_value + + def Operation(self, x, y): + """Performs the operation between two values.""" + + def Operate(self, values): + """Takes a list of values and if at least one matches, returns True.""" + for val in values: + try: + if self.Operation(val, self.right_operand): + return True + else: + continue + except (ValueError, TypeError): + continue + return False + + def Matches(self, obj): + key = self.left_operand + values = self.value_expander.Expand(obj, key) + if values and self.Operate(values): + return self.bool_value + return not self.bool_value + + +class Equals(GenericBinaryOperator): + """Matches objects when the right operand equals the expanded value.""" + + def Operation(self, x, y): + return x == y + + +class NotEquals(Equals): + """Matches when the right operand isn't equal to the expanded value.""" + + def __init__(self, **kwargs): + super(NotEquals, self).__init__(**kwargs) + self.bool_value = False + + +class Less(GenericBinaryOperator): + """Whether the expanded value >= right_operand.""" + + def Operation(self, x, y): + return x < y + + +class LessEqual(GenericBinaryOperator): + """Whether the expanded value <= right_operand.""" + + def Operation(self, x, y): + return x <= y + + +class Greater(GenericBinaryOperator): + """Whether the expanded value > right_operand.""" + + def Operation(self, x, y): + return x > y + + +class GreaterEqual(GenericBinaryOperator): + """Whether the expanded value >= right_operand.""" + + def Operation(self, x, y): + return x >= y + + +class Contains(GenericBinaryOperator): + """Whether the right operand is contained in the value.""" + + def Operation(self, x, y): + if type(x) in (str, unicode): + return y.lower() in x.lower() + + return y in x + + +class InSet(GenericBinaryOperator): + # TODO(user): Change to an N-ary Operator? + """Whether all values are contained within the right operand.""" + + def Operation(self, x, y): + """Whether x is fully contained in y.""" + if x in y: + return True + + # x might be an iterable + # first we need to skip strings or we'll do silly things + if (isinstance(x, basestring) + or isinstance(x, bytes)): + return False + + try: + for value in x: + if value not in y: + return False + return True + except TypeError: + return False + + +class Regexp(GenericBinaryOperator): + """Whether the value matches the regexp in the right operand.""" + + def __init__(self, *children, **kwargs): + super(Regexp, self).__init__(*children, **kwargs) + # Note that right_operand is not necessarily a string. + logging.debug(u'Compiled: {0!s}'.format(self.right_operand)) + try: + self.compiled_re = re.compile( + utils.GetUnicodeString(self.right_operand), re.DOTALL) + except re.error: + raise ValueError(u'Regular expression "{0!s}" is malformed.'.format( + self.right_operand)) + + def Operation(self, x, unused_y): + try: + if self.compiled_re.search(utils.GetUnicodeString(x)): + return True + except TypeError: + pass + + return False + + +class RegexpInsensitive(Regexp): + """Whether the value matches the regexp in the right operand.""" + + def __init__(self, *children, **kwargs): + super(RegexpInsensitive, self).__init__(*children, **kwargs) + # Note that right_operand is not necessarily a string. + logging.debug(u'Compiled: {0!s}'.format(self.right_operand)) + try: + self.compiled_re = re.compile(utils.GetUnicodeString(self.right_operand), + re.I | re.DOTALL) + except re.error: + raise ValueError(u'Regular expression "{0!s}" is malformed.'.format( + self.right_operand)) + + +class Context(Operator): + """Restricts the child operators to a specific context within the object. + + Solves the context problem. The context problem is the following: + Suppose you store a list of loaded DLLs within a process. Suppose that for + each of these DLLs you store the number of imported functions and each of the + imported functions name. + + Imagine that a malicious DLL is injected into processes and its indicators are + that it only imports one function and that it is RegQueryValueEx. You'd write + your indicator like this: + + + AndOperator( + Equal("ImportedDLLs.ImpFunctions.Name", "RegQueryValueEx"), + Equal("ImportedDLLs.NumImpFunctions", "1") + ) + + Now imagine you have these two processes on a given system. + + Process1 + +[0]__ImportedDlls + +[0]__Name: "notevil.dll" + |[0]__ImpFunctions + | +[1]__Name: "CreateFileA" + |[0]__NumImpFunctions: 1 + | + +[1]__Name: "alsonotevil.dll" + |[1]__ImpFunctions + | +[0]__Name: "RegQueryValueEx" + | +[1]__Name: "CreateFileA" + |[1]__NumImpFunctions: 2 + + Process2 + +[0]__ImportedDlls + +[0]__Name: "evil.dll" + |[0]__ImpFunctions + | +[0]__Name: "RegQueryValueEx" + |[0]__NumImpFunctions: 1 + + Both Process1 and Process2 match your query, as each of the indicators are + evaluated separatedly. While you wanted to express "find me processes that + have a DLL that has both one imported function and ReqQueryValueEx is in the + list of imported functions", your indicator actually means "find processes + that have at least a DLL with 1 imported functions and at least one DLL that + imports the ReqQueryValueEx function". + + To write such an indicator you need to specify a context of ImportedDLLs for + these two clauses. Such that you convert your indicator to: + + Context("ImportedDLLs", + AndOperator( + Equal("ImpFunctions.Name", "RegQueryValueEx"), + Equal("NumImpFunctions", "1") + )) + + Context will execute the filter specified as the second parameter for each of + the objects under "ImportedDLLs", thus applying the condition per DLL, not per + object and returning the right result. + """ + + def __init__(self, arguments=None, **kwargs): + if len(arguments) != 2: + raise InvalidNumberOfOperands(u'Context accepts only 2 operands.') + super(Context, self).__init__(arguments=arguments, **kwargs) + self.context, self.condition = self.args + + def Matches(self, obj): + for object_list in self.value_expander.Expand(obj, self.context): + for sub_object in object_list: + if self.condition.Matches(sub_object): + return True + return False + + +OP2FN = { + 'equals': Equals, + 'is': Equals, + '==': Equals, + '!=': NotEquals, + 'contains': Contains, + '>': Greater, + '>=': GreaterEqual, + '<': Less, + '<=': LessEqual, + 'inset': InSet, + 'regexp': Regexp, + 'iregexp': RegexpInsensitive} + + +class ValueExpander(object): + """Encapsulates the logic to expand values available in an object. + + Once instantiated and called, this class returns all the values that follow a + given field path. + """ + + FIELD_SEPARATOR = '.' + + def _GetAttributeName(self, path): + """Returns the attribute name to fetch given a path.""" + return path[0] + + def _GetValue(self, unused_obj, unused_attr_name): + """Returns the value of tha attribute attr_name.""" + raise NotImplementedError() + + def _AtLeaf(self, attr_value): + """Called when at a leaf value. Should yield a value.""" + yield attr_value + + def _AtNonLeaf(self, attr_value, path): + """Called when at a non-leaf value. Should recurse and yield values.""" + try: + # Check first for iterables + # If it's a dictionary, we yield it + if isinstance(attr_value, dict): + yield attr_value + else: + # If it's an iterable, we recurse on each value. + for sub_obj in attr_value: + for value in self.Expand(sub_obj, path[1:]): + yield value + except TypeError: # This is then not iterable, we recurse with the value + for value in self.Expand(attr_value, path[1:]): + yield value + + def Expand(self, obj, path): + """Returns a list of all the values for the given path in the object obj. + + Given a path such as ["sub1", "sub2"] it returns all the values available + in obj.sub1.sub2 as a list. sub1 and sub2 must be data attributes or + properties. + + If sub1 returns a list of objects, or a generator, Expand aggregates the + values for the remaining path for each of the objects, thus returning a + list of all the values under the given path for the input object. + + Args: + obj: An object that will be traversed for the given path + path: A list of strings + + Yields: + The values once the object is traversed. + """ + if isinstance(path, basestring): + path = path.split(self.FIELD_SEPARATOR) + + attr_name = self._GetAttributeName(path) + attr_value = self._GetValue(obj, attr_name) + if attr_value is None: + return + + if len(path) == 1: + for value in self._AtLeaf(attr_value): + yield value + else: + for value in self._AtNonLeaf(attr_value, path): + yield value + + +class AttributeValueExpander(ValueExpander): + """An expander that gives values based on object attribute names.""" + + def _GetValue(self, obj, attr_name): + return getattr(obj, attr_name, None) + + +class LowercaseAttributeValueExpander(AttributeValueExpander): + """An expander that lowercases all attribute names before access.""" + + def _GetAttributeName(self, path): + return path[0].lower() + + +class DictValueExpander(ValueExpander): + """An expander that gets values from dictionary access to the object.""" + + def _GetValue(self, obj, attr_name): + return obj.get(attr_name, None) + + +class BasicExpression(lexer.Expression): + """Basic Expression.""" + + def __init__(self): + super(BasicExpression, self).__init__() + self.bool_value = True + + def FlipBool(self): + self.bool_value = not self.bool_value + + def Compile(self, filter_implementation): + arguments = [self.attribute] + op_str = self.operator.lower() + operator = filter_implementation.OPS.get(op_str, None) + + if not operator: + raise ParseError(u'Unknown operator {0:s} provided.'.format( + self.operator)) + + arguments.extend(self.args) + expander = filter_implementation.FILTERS['ValueExpander'] + ops = operator(arguments=arguments, value_expander=expander) + if not self.bool_value: + if hasattr(ops, 'FlipBool'): + ops.FlipBool() + + return ops + + +class ContextExpression(lexer.Expression): + """Represents the context operator.""" + + def __init__(self, attribute="", part=None): + self.attribute = attribute + self.args = [] + if part: + self.args.append(part) + super(ContextExpression, self).__init__() + + def __str__(self): + return 'Context({0:s} {1:s})'.format( + self.attribute, [str(x) for x in self.args]) + + def SetExpression(self, expression): + """Set the expression.""" + if isinstance(expression, lexer.Expression): + self.args = [expression] + else: + raise ParseError(u'Expected expression, got {0:s}.'.format(expression)) + + def Compile(self, filter_implementation): + """Compile the expression.""" + arguments = [self.attribute] + for arg in self.args: + arguments.append(arg.Compile(filter_implementation)) + expander = filter_implementation.FILTERS['ValueExpander'] + context_cls = filter_implementation.FILTERS['Context'] + return context_cls(arguments=arguments, + value_expander=expander) + + +class BinaryExpression(lexer.BinaryExpression): + def Compile(self, filter_implementation): + """Compile the binary expression into a filter object.""" + operator = self.operator.lower() + if operator == 'and' or operator == '&&': + method = 'AndFilter' + elif operator == 'or' or operator == '||': + method = 'OrFilter' + else: + raise ParseError(u'Invalid binary operator {0:s}.'.format(operator)) + + args = [x.Compile(filter_implementation) for x in self.args] + return filter_implementation.FILTERS[method](arguments=args) + + +class Parser(lexer.SearchParser): + """Parses and generates an AST for a query written in the described language. + + Examples of valid syntax: + size is 40 + (name contains "Program Files" AND hash.md5 is "123abc") + @imported_modules (num_symbols = 14 AND symbol.name is "FindWindow") + """ + expression_cls = BasicExpression + binary_expression_cls = BinaryExpression + context_cls = ContextExpression + + tokens = [ + # Operators and related tokens + lexer.Token('INITIAL', r'\@[\w._0-9]+', + 'ContextOperator,PushState', 'CONTEXTOPEN'), + lexer.Token('INITIAL', r'[^\s\(\)]', 'PushState,PushBack', 'ATTRIBUTE'), + lexer.Token('INITIAL', r'\(', 'PushState,BracketOpen', None), + lexer.Token('INITIAL', r'\)', 'BracketClose', 'BINARY'), + + # Context + lexer.Token('CONTEXTOPEN', r'\(', 'BracketOpen', 'INITIAL'), + + # Double quoted string + lexer.Token('STRING', '"', 'PopState,StringFinish', None), + lexer.Token('STRING', r'\\x(..)', 'HexEscape', None), + lexer.Token('STRING', r'\\(.)', 'StringEscape', None), + lexer.Token('STRING', r'[^\\"]+', 'StringInsert', None), + + # Single quoted string + lexer.Token('SQ_STRING', '\'', 'PopState,StringFinish', None), + lexer.Token('SQ_STRING', r'\\x(..)', 'HexEscape', None), + lexer.Token('SQ_STRING', r'\\(.)', 'StringEscape', None), + lexer.Token('SQ_STRING', r'[^\\\']+', 'StringInsert', None), + + # Basic expression + lexer.Token('ATTRIBUTE', r'[\w._0-9]+', 'StoreAttribute', 'OPERATOR'), + lexer.Token('OPERATOR', r'not ', 'FlipLogic', None), + lexer.Token('OPERATOR', r'(\w+|[<>!=]=?)', 'StoreOperator', 'CHECKNOT'), + lexer.Token('CHECKNOT', r'not', 'FlipLogic', 'ARG'), + lexer.Token('CHECKNOT', r'\s+', None, None), + lexer.Token('CHECKNOT', r'([^not])', 'PushBack', 'ARG'), + lexer.Token('ARG', r'(\d+\.\d+)', 'InsertFloatArg', 'ARG'), + lexer.Token('ARG', r'(0x\d+)', 'InsertInt16Arg', 'ARG'), + lexer.Token('ARG', r'(\d+)', 'InsertIntArg', 'ARG'), + lexer.Token('ARG', '"', 'PushState,StringStart', 'STRING'), + lexer.Token('ARG', '\'', 'PushState,StringStart', 'SQ_STRING'), + # When the last parameter from arg_list has been pushed + + # State where binary operators are supported (AND, OR) + lexer.Token('BINARY', r'(?i)(and|or|\&\&|\|\|)', + 'BinaryOperator', 'INITIAL'), + # - We can also skip spaces + lexer.Token('BINARY', r'\s+', None, None), + # - But if it's not "and" or just spaces we have to go back + lexer.Token('BINARY', '.', 'PushBack,PopState', None), + + # Skip whitespace. + lexer.Token('.', r'\s+', None, None), + ] + + def StoreAttribute(self, string='', **kwargs): + self.flipped = False + super(Parser, self).StoreAttribute(string, **kwargs) + + def FlipAllowed(self): + """Raise an error if the not keyword is used where it is not allowed.""" + if not hasattr(self, 'flipped'): + raise ParseError(u'Not defined.') + + if not self.flipped: + return + + if self.current_expression.operator: + if not self.current_expression.operator.lower() in ( + 'is', 'contains', 'inset', 'equals'): + raise ParseError( + u'Keyword \'not\' does not work against operator: {0:s}'.format( + self.current_expression.operator)) + + def FlipLogic(self, **unused_kwargs): + """Flip the boolean logic of the expression. + + If an expression is configured to return True when the condition + is met this logic will flip that to False, and vice versa. + """ + if hasattr(self, 'flipped') and self.flipped: + raise ParseError(u'The operator \'not\' can only be expressed once.') + + if self.current_expression.args: + raise ParseError( + u'Unable to place the keyword \'not\' after an argument.') + + self.flipped = True + + # Check if this flip operation should be allowed. + self.FlipAllowed() + + if hasattr(self.current_expression, 'FlipBool'): + self.current_expression.FlipBool() + logging.debug(u'Negative matching [flipping boolean logic].') + else: + logging.warning( + u'Unable to perform a negative match, issuing a positive one.') + + def InsertArg(self, string='', **unused_kwargs): + """Insert an arg to the current expression.""" + # Note that "string" is not necessarily of type string. + logging.debug(u'Storing argument: {0!s}'.format(string)) + + # Check if this flip operation should be allowed. + self.FlipAllowed() + + # This expression is complete + if self.current_expression.AddArg(string): + self.stack.append(self.current_expression) + self.current_expression = self.expression_cls() + # We go to the BINARY state, to find if there's an AND or OR operator + return 'BINARY' + + def InsertFloatArg(self, string='', **unused_kwargs): + """Inserts a Float argument.""" + try: + float_value = float(string) + except (TypeError, ValueError): + raise ParseError(u'{0:s} is not a valid float.'.format(string)) + return self.InsertArg(float_value) + + def InsertIntArg(self, string='', **unused_kwargs): + """Inserts an Integer argument.""" + try: + int_value = int(string) + except (TypeError, ValueError): + raise ParseError(u'{0:s} is not a valid integer.'.format(string)) + return self.InsertArg(int_value) + + def InsertInt16Arg(self, string='', **unused_kwargs): + """Inserts an Integer in base16 argument.""" + try: + int_value = int(string, 16) + except (TypeError, ValueError): + raise ParseError(u'{0:s} is not a valid base16 integer.'.format(string)) + return self.InsertArg(int_value) + + def StringFinish(self, **unused_kwargs): + if self.state == 'ATTRIBUTE': + return self.StoreAttribute(string=self.string) + + elif self.state == 'ARG': + return self.InsertArg(string=self.string) + + def StringEscape(self, string, match, **unused_kwargs): + """Escape backslashes found inside a string quote. + + Backslashes followed by anything other than [\'"rnbt.ws] will raise + an Error. + + Args: + string: The string that matched. + match: The match object (m.group(1) is the escaped code) + + Raises: + ParseError: When the escaped string is not one of [\'"rnbt] + """ + if match.group(1) in '\\\'"rnbt\\.ws': + self.string += string.decode('string_escape') + else: + raise ParseError(u'Invalid escape character {0:s}.'.format(string)) + + def HexEscape(self, string, match, **unused_kwargs): + """Converts a hex escaped string.""" + logging.debug(u'HexEscape matched {0:s}.'.format(string)) + hex_string = match.group(1) + try: + self.string += binascii.unhexlify(hex_string) + except TypeError: + raise ParseError(u'Invalid hex escape {0:s}.'.format(string)) + + def ContextOperator(self, string='', **unused_kwargs): + self.stack.append(self.context_cls(string[1:])) + + def Reduce(self): + """Reduce the token stack into an AST.""" + # Check for sanity + if self.state != 'INITIAL' and self.state != 'BINARY': + self.Error(u'Premature end of expression') + + length = len(self.stack) + while length > 1: + # Precendence order + self._CombineParenthesis() + self._CombineBinaryExpressions('and') + self._CombineBinaryExpressions('or') + self._CombineContext() + + # No change + if len(self.stack) == length: + break + length = len(self.stack) + + if length != 1: + self.Error(u'Illegal query expression.') + + return self.stack[0] + + def Error(self, message=None, _=None): + # Note that none of the values necessarily are strings. + raise ParseError(u'{0!s} in position {1!s}: {2!s} <----> {3!s} )'.format( + message, len(self.processed_buffer), self.processed_buffer, + self.buffer)) + + def _CombineBinaryExpressions(self, operator): + for i in range(1, len(self.stack)-1): + item = self.stack[i] + if (isinstance(item, lexer.BinaryExpression) and + item.operator.lower() == operator.lower() and + isinstance(self.stack[i-1], lexer.Expression) and + isinstance(self.stack[i+1], lexer.Expression)): + lhs = self.stack[i-1] + rhs = self.stack[i+1] + + self.stack[i].AddOperands(lhs, rhs) + self.stack[i-1] = None + self.stack[i+1] = None + + self.stack = filter(None, self.stack) + + def _CombineContext(self): + # Context can merge from item 0 + for i in range(len(self.stack)-1, 0, -1): + item = self.stack[i-1] + if (isinstance(item, ContextExpression) and + isinstance(self.stack[i], lexer.Expression)): + expression = self.stack[i] + self.stack[i-1].SetExpression(expression) + self.stack[i] = None + + self.stack = filter(None, self.stack) + + +### FILTER IMPLEMENTATIONS +class BaseFilterImplementation(object): + """Defines the base implementation of an object filter by its attributes. + + Inherit from this class, switch any of the needed operators and pass it to + the Compile method of a parsed string to obtain an executable filter. + """ + + OPS = OP2FN + FILTERS = { + 'ValueExpander': AttributeValueExpander, + 'AndFilter': AndFilter, + 'OrFilter': OrFilter, + 'IdentityFilter': IdentityFilter, + 'Context': Context} + + +class LowercaseAttributeFilterImplementation(BaseFilterImplementation): + """Does field name access on the lowercase version of names. + + Useful to only access attributes and properties with Google's python naming + style. + """ + + FILTERS = {} + FILTERS.update(BaseFilterImplementation.FILTERS) + FILTERS.update({'ValueExpander': LowercaseAttributeValueExpander}) + + +class DictFilterImplementation(BaseFilterImplementation): + """Does value fetching by dictionary access on the object.""" + + FILTERS = {} + FILTERS.update(BaseFilterImplementation.FILTERS) + FILTERS.update({'ValueExpander': DictValueExpander}) + + diff --git a/plaso/lib/objectfilter_test.py b/plaso/lib/objectfilter_test.py new file mode 100644 index 0000000..a4c3761 --- /dev/null +++ b/plaso/lib/objectfilter_test.py @@ -0,0 +1,519 @@ +#!/usr/bin/env python +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains tests for the object filter.""" + +import unittest + +from plaso.lib import objectfilter + + +class DummyObject(object): + def __init__(self, key, value): + setattr(self, key, value) + + +class HashObject(object): + def __init__(self, hash_value=None): + self.value = hash_value + + @property + def md5(self): + return self.value + + def __eq__(self, y): + return self.value == y + + def __lt__(self, y): + return self.value < y + + +class Dll(object): + def __init__(self, name, imported_functions=None, exported_functions=None): + self.name = name + self._imported_functions = imported_functions or [] + self.num_imported_functions = len(self._imported_functions) + self.exported_functions = exported_functions or [] + self.num_exported_functions = len(self.exported_functions) + + @property + def imported_functions(self): + for fn in self._imported_functions: + yield fn + + +class DummyFile(object): + _FILENAME = 'boot.ini' + + ATTR1 = 'Backup' + ATTR2 = 'Archive' + HASH1 = '123abc' + HASH2 = '456def' + + non_callable_leaf = 'yoda' + + def __init__(self): + self.non_callable = HashObject(self.HASH1) + self.non_callable_repeated = [ + DummyObject('desmond', ['brotha', 'brotha']), + DummyObject('desmond', ['brotha', 'sista'])] + self.imported_dll1 = Dll('a.dll', ['FindWindow', 'CreateFileA']) + self.imported_dll2 = Dll('b.dll', ['RegQueryValueEx']) + + @property + def name(self): + return self._FILENAME + + @property + def attributes(self): + return [self.ATTR1, self.ATTR2] + + @property + def hash(self): + return [HashObject(self.HASH1), HashObject(self.HASH2)] + + @property + def size(self): + return 10 + + @property + def deferred_values(self): + for v in ['a', 'b']: + yield v + + @property + def novalues(self): + return [] + + @property + def imported_dlls(self): + return [self.imported_dll1, self.imported_dll2] + + def Callable(self): + raise RuntimeError(u'This can not be called.') + + @property + def float(self): + return 123.9823 + + +class ObjectFilterTest(unittest.TestCase): + def setUp(self): + self.file = DummyFile() + self.filter_imp = objectfilter.LowercaseAttributeFilterImplementation + self.value_expander = self.filter_imp.FILTERS['ValueExpander'] + + operator_tests = { + objectfilter.Less: [ + (True, ['size', 1000]), + (True, ['size', 11]), + (False, ['size', 10]), + (False, ['size', 0]), + (False, ['float', 1.0]), + (True, ['float', 123.9824])], + objectfilter.LessEqual: [ + (True, ['size', 1000]), + (True, ['size', 11]), + (True, ['size', 10]), + (False, ['size', 9]), + (False, ['float', 1.0]), + (True, ['float', 123.9823])], + objectfilter.Greater: [ + (True, ['size', 1]), + (True, ['size', 9.23]), + (False, ['size', 10]), + (False, ['size', 1000]), + (True, ['float', 122]), + (True, ['float', 1.0])], + objectfilter.GreaterEqual: [ + (False, ['size', 1000]), + (False, ['size', 11]), + (True, ['size', 10]), + (True, ['size', 0]), + # Floats work fine too. + (True, ['float', 122]), + (True, ['float', 123.9823]), + # Comparisons works with strings, although it might be a bit silly. + (True, ['name', 'aoot.ini'])], + objectfilter.Contains: [ + # Contains works with strings. + (True, ['name', 'boot.ini']), + (True, ['name', 'boot']), + (False, ['name', 'meh']), + # Works with generators. + (True, ['imported_dlls.imported_functions', 'FindWindow']), + # But not with numbers. + (False, ['size', 12])], + objectfilter.Equals: [ + (True, ['name', 'boot.ini']), + (False, ['name', 'foobar']), + (True, ['float', 123.9823])], + objectfilter.NotEquals: [ + (False, ['name', 'boot.ini']), + (True, ['name', 'foobar']), + (True, ['float', 25])], + objectfilter.InSet: [ + (True, ['name', ['boot.ini', 'autoexec.bat']]), + (True, ['name', 'boot.ini']), + (False, ['name', 'NOPE']), + # All values of attributes are within these. + (True, ['attributes', ['Archive', 'Backup', 'Nonexisting']]), + # Not all values of attributes are within these. + (False, ['attributes', ['Executable', 'Sparse']])], + objectfilter.Regexp: [ + (True, ['name', '^boot.ini$']), + (True, ['name', 'boot.ini']), + (False, ['name', '^$']), + (True, ['attributes', 'Archive']), + # One can regexp numbers if he's inclined to. + (True, ['size', 0]), + # But regexp doesn't work with lists or generators for the moment. + (False, ['imported_dlls.imported_functions', 'FindWindow'])], + } + + def testBinaryOperators(self): + for operator, test_data in self.operator_tests.items(): + for test_unit in test_data: + # TODO: why is there a print statement here? + print (u'Testing {0:s} with {1!s} and {2!s}'.format( + operator, test_unit[0], test_unit[1])) + kwargs = {'arguments': test_unit[1], + 'value_expander': self.value_expander} + ops = operator(**kwargs) + self.assertEqual(test_unit[0], ops.Matches(self.file)) + if hasattr(ops, 'FlipBool'): + ops.FlipBool() + # TODO: why is there a print statement here? + print u'Testing negative matching.' + self.assertEqual(not test_unit[0], ops.Matches(self.file)) + + def testExpand(self): + # Case insensitivity. + values_lowercase = self.value_expander().Expand(self.file, 'size') + values_uppercase = self.value_expander().Expand(self.file, 'Size') + self.assertListEqual(list(values_lowercase), list(values_uppercase)) + + # Existing, non-repeated, leaf is a value. + values = self.value_expander().Expand(self.file, 'size') + self.assertListEqual(list(values), [10]) + + # Existing, non-repeated, leaf is iterable. + values = self.value_expander().Expand(self.file, 'attributes') + self.assertListEqual(list(values), [[DummyFile.ATTR1, DummyFile.ATTR2]]) + + # Existing, repeated, leaf is value. + values = self.value_expander().Expand(self.file, 'hash.md5') + self.assertListEqual(list(values), [DummyFile.HASH1, DummyFile.HASH2]) + + # Existing, repeated, leaf is iterable. + values = self.value_expander().Expand( + self.file, 'non_callable_repeated.desmond') + self.assertListEqual( + list(values), [['brotha', 'brotha'], ['brotha', 'sista']]) + + # Now with an iterator. + values = self.value_expander().Expand(self.file, 'deferred_values') + self.assertListEqual([list(value) for value in values], [['a', 'b']]) + + # Iterator > generator. + values = self.value_expander().Expand( + self.file, 'imported_dlls.imported_functions') + expected = [['FindWindow', 'CreateFileA'], ['RegQueryValueEx']] + self.assertListEqual([list(value) for value in values], expected) + + # Non-existing first path. + values = self.value_expander().Expand(self.file, 'nonexistant') + self.assertListEqual(list(values), []) + + # Non-existing in the middle. + values = self.value_expander().Expand(self.file, 'hash.mink.boo') + self.assertListEqual(list(values), []) + + # Non-existing as a leaf. + values = self.value_expander().Expand(self.file, 'hash.mink') + self.assertListEqual(list(values), []) + + # Non-callable leaf. + values = self.value_expander().Expand(self.file, 'non_callable_leaf') + self.assertListEqual(list(values), [DummyFile.non_callable_leaf]) + + # callable. + values = self.value_expander().Expand(self.file, 'Callable') + self.assertListEqual(list(values), []) + + # leaf under a callable. Will return nothing. + values = self.value_expander().Expand(self.file, 'Callable.a') + self.assertListEqual(list(values), []) + + def testGenericBinaryOperator(self): + class TestBinaryOperator(objectfilter.GenericBinaryOperator): + values = list() + + def Operation(self, x, _): + return self.values.append(x) + + # Test a common binary operator. + tbo = TestBinaryOperator( + arguments=['whatever', 0], value_expander=self.value_expander) + self.assertEqual(tbo.right_operand, 0) + self.assertEqual(tbo.args[0], 'whatever') + tbo.Matches(DummyObject('whatever', 'id')) + tbo.Matches(DummyObject('whatever', 'id2')) + tbo.Matches(DummyObject('whatever', 'bg')) + tbo.Matches(DummyObject('whatever', 'bg2')) + self.assertListEqual(tbo.values, ['id', 'id2', 'bg', 'bg2']) + + def testContext(self): + self.assertRaises( + objectfilter.InvalidNumberOfOperands, objectfilter.Context, + arguments=['context'], value_expander=self.value_expander) + + self.assertRaises( + objectfilter.InvalidNumberOfOperands, objectfilter.Context, + arguments=[ + 'context', objectfilter.Equals( + arguments=['path', 'value'], + value_expander=self.value_expander), + objectfilter.Equals( + arguments=['another_path', 'value'], + value_expander=self.value_expander)], + value_expander=self.value_expander) + + # One imported_dll imports 2 functions AND one imported_dll imports + # function RegQueryValueEx. + arguments = [ + objectfilter.Equals( + arguments=['imported_dlls.num_imported_functions', 1], + value_expander=self.value_expander), + objectfilter.Contains( + arguments=['imported_dlls.imported_functions', + 'RegQueryValueEx'], + value_expander=self.value_expander)] + condition = objectfilter.AndFilter(arguments=arguments) + # Without context, it matches because both filters match separately. + self.assertEqual(True, condition.Matches(self.file)) + + arguments = [ + objectfilter.Equals( + arguments=['num_imported_functions', 2], + value_expander=self.value_expander), + objectfilter.Contains( + arguments=['imported_functions', 'RegQueryValueEx'], + value_expander=self.value_expander)] + condition = objectfilter.AndFilter(arguments=arguments) + # The same DLL imports 2 functions AND one of these is RegQueryValueEx. + context = objectfilter.Context(arguments=['imported_dlls', condition], + value_expander=self.value_expander) + # With context, it doesn't match because both don't match in the same dll. + self.assertEqual(False, context.Matches(self.file)) + + # One imported_dll imports only 1 function AND one imported_dll imports + # function RegQueryValueEx. + condition = objectfilter.AndFilter(arguments=[ + objectfilter.Equals( + arguments=['num_imported_functions', 1], + value_expander=self.value_expander), + objectfilter.Contains( + arguments=['imported_functions', 'RegQueryValueEx'], + value_expander=self.value_expander)]) + # The same DLL imports 1 function AND it's RegQueryValueEx. + context = objectfilter.Context(['imported_dlls', condition], + value_expander=self.value_expander) + self.assertEqual(True, context.Matches(self.file)) + + # Now test the context with a straight query. + query = u'\n'.join([ + '@imported_dlls', + '(', + ' imported_functions contains "RegQueryValueEx"', + ' AND num_imported_functions == 1', + ')']) + + filter_ = objectfilter.Parser(query).Parse() + filter_ = filter_.Compile(self.filter_imp) + self.assertEqual(True, filter_.Matches(self.file)) + + def testRegexpRaises(self): + with self.assertRaises(ValueError): + objectfilter.Regexp( + arguments=['name', 'I [dont compile'], + value_expander=self.value_expander) + + def testEscaping(self): + parser = objectfilter.Parser(r'a is "\n"').Parse() + self.assertEqual(parser.args[0], '\n') + # Invalid escape sequence. + parser = objectfilter.Parser(r'a is "\z"') + with self.assertRaises(objectfilter.ParseError): + parser.Parse() + + # Can escape the backslash. + parser = objectfilter.Parser(r'a is "\\"').Parse() + self.assertEqual(parser.args[0], '\\') + + # Test hexadecimal escaping. + + # This fails as it's not really a hex escaped string. + parser = objectfilter.Parser(r'a is "\xJZ"') + with self.assertRaises(objectfilter.ParseError): + parser.Parse() + + # Instead, this is what one should write. + parser = objectfilter.Parser(r'a is "\\xJZ"').Parse() + self.assertEqual(parser.args[0], r'\xJZ') + # Standard hex-escape. + parser = objectfilter.Parser(r'a is "\x41\x41\x41"').Parse() + self.assertEqual(parser.args[0], 'AAA') + # Hex-escape + a character. + parser = objectfilter.Parser(r'a is "\x414"').Parse() + self.assertEqual(parser.args[0], r'A4') + # How to include r'\x41'. + parser = objectfilter.Parser(r'a is "\\x41"').Parse() + self.assertEqual(parser.args[0], r'\x41') + + def testParse(self): + # Arguments are either int, float or quoted string. + objectfilter.Parser('attribute == 1').Parse() + objectfilter.Parser('attribute == 0x10').Parse() + parser = objectfilter.Parser('attribute == 1a') + with self.assertRaises(objectfilter.ParseError): + parser.Parse() + + objectfilter.Parser('attribute == 1.2').Parse() + objectfilter.Parser('attribute == \'bla\'').Parse() + objectfilter.Parser('attribute == "bla"').Parse() + parser = objectfilter.Parser('something == red') + self.assertRaises(objectfilter.ParseError, parser.Parse) + + # Can't start with AND. + parser = objectfilter.Parser('and something is \'Blue\'') + with self.assertRaises(objectfilter.ParseError): + parser.Parse() + + # Test negative filters. + parser = objectfilter.Parser('attribute not == \'dancer\'') + with self.assertRaises(objectfilter.ParseError): + parser.Parse() + + parser = objectfilter.Parser('attribute == not \'dancer\'') + with self.assertRaises(objectfilter.ParseError): + parser.Parse() + + parser = objectfilter.Parser('attribute not not equals \'dancer\'') + with self.assertRaises(objectfilter.ParseError): + parser.Parse() + + parser = objectfilter.Parser('attribute not > 23') + with self.assertRaises(objectfilter.ParseError): + parser.Parse() + + # Need to close braces. + objectfilter.Parser('(a is 3)').Parse() + parser = objectfilter.Parser('(a is 3') + self.assertRaises(objectfilter.ParseError, parser.Parse) + # Need to open braces to close them. + parser = objectfilter.Parser('a is 3)') + self.assertRaises(objectfilter.ParseError, parser.Parse) + + # Context Operator alone is not accepted. + parser = objectfilter.Parser('@attributes') + with self.assertRaises(objectfilter.ParseError): + parser.Parse() + + # Accepted only with braces. + objectfilter.Parser('@attributes( name is \'adrien\')').Parse() + # Not without them. + parser = objectfilter.Parser('@attributes name is \'adrien\'') + with self.assertRaises(objectfilter.ParseError): + parser.Parse() + + # Can nest context operators. + query = '@imported_dlls( @imported_function( name is \'OpenFileA\'))' + objectfilter.Parser(query).Parse() + # Can nest context operators and mix braces without it messing up. + query = '@imported_dlls( @imported_function( name is \'OpenFileA\'))' + parser = objectfilter.Parser(query).Parse() + query = u'\n'.join([ + '@imported_dlls', + '(', + ' @imported_function', + ' (', + ' name is "OpenFileA" and ordinal == 12', + ' )', + ')']) + + parser = objectfilter.Parser(query).Parse() + # Mix context and binary operators. + query = u'\n'.join([ + '@imported_dlls', + '(', + ' @imported_function', + ' (', + ' name is "OpenFileA"', + ' ) AND num_functions == 2', + ')']) + + parser = objectfilter.Parser(query).Parse() + # Also on the right. + query = u'\n'.join([ + '@imported_dlls', + '(', + ' num_functions == 2 AND', + ' @imported_function', + ' (', + ' name is "OpenFileA"', + ' )', + ')']) + + # Altogether. + # There's an imported dll that imports OpenFileA AND + # an imported DLL matching advapi32.dll that imports RegQueryValueExA AND + # and it exports a symbol called 'inject'. + query = u'\n'.join([ + '@imported_dlls( @imported_function ( name is "OpenFileA" ) )', + 'AND', + '@imported_dlls (', + ' name regexp "(?i)advapi32.dll"', + ' AND @imported_function ( name is "RegQueryValueEx" )', + ')', + 'AND @exported_symbols(name is "inject")']) + + def testCompile(self): + obj = DummyObject('something', 'Blue') + parser = objectfilter.Parser('something == \'Blue\'').Parse() + filter_ = parser.Compile(self.filter_imp) + self.assertEqual(filter_.Matches(obj), True) + parser = objectfilter.Parser('something == \'Red\'').Parse() + filter_ = parser.Compile(self.filter_imp) + self.assertEqual(filter_.Matches(obj), False) + parser = objectfilter.Parser('something == "Red"').Parse() + filter_ = parser.Compile(self.filter_imp) + self.assertEqual(filter_.Matches(obj), False) + obj = DummyObject('size', 4) + parser = objectfilter.Parser('size < 3').Parse() + filter_ = parser.Compile(self.filter_imp) + self.assertEqual(filter_.Matches(obj), False) + parser = objectfilter.Parser('size == 4').Parse() + filter_ = parser.Compile(self.filter_imp) + self.assertEqual(filter_.Matches(obj), True) + query = 'something is \'Blue\' and size not contains 3' + parser = objectfilter.Parser(query).Parse() + filter_ = parser.Compile(self.filter_imp) + self.assertEqual(filter_.Matches(obj), False) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/lib/output.py b/plaso/lib/output.py new file mode 100644 index 0000000..6782299 --- /dev/null +++ b/plaso/lib/output.py @@ -0,0 +1,394 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the interface for output parsing of plaso. + +The default output or storage mechanism of Plaso is not in a human +readable format. There needs to be a way to define the output in such +a way. + +After the timeline is collected and stored another tool can read, filter, +sort and process the output inside the storage, and send each processed +entry to an output formatter that takes care of parsing the output into +a human readable format for easy human consumption/analysis. + +""" + +import abc +import logging +import sys + +from plaso.lib import errors +from plaso.lib import registry +from plaso.lib import utils + +import pytz + + +class LogOutputFormatter(object): + """A base class for formatting output produced by plaso. + + This class exists mostly for documentation purposes. Subclasses should + override the relevant methods to act on the callbacks. + """ + + __metaclass__ = registry.MetaclassRegistry + __abstract = True + + # Optional arguments to be added to the argument parser. + # An example would be: + # ARGUMENTS = [('--myparameter', { + # 'action': 'store', + # 'help': 'This is my parameter help', + # 'dest': 'myparameter', + # 'default': '', + # 'type': 'unicode'})] + # + # Where all arguments into the dict object have a direct translation + # into the argparse parser. + ARGUMENTS = [] + + def __init__(self, store, filehandle=sys.stdout, config=None, + filter_use=None): + """Constructor for the output module. + + Args: + store: A StorageFile object that defines the storage. + filehandle: A file-like object that can be written to. + config: The configuration object, containing config information. + filter_use: A filter_interface.FilterObject object. + """ + zone = getattr(config, 'timezone', 'UTC') + try: + self.zone = pytz.timezone(zone) + except pytz.UnknownTimeZoneError: + logging.warning(u'Unkown timezone: {0:s} defaulting to: UTC'.format( + zone)) + self.zone = pytz.utc + + self.filehandle = filehandle + self.store = store + self._filter = filter_use + self._config = config + + self.encoding = getattr(config, 'preferred_encoding', 'utf-8') + + # TODO: this function seems to be only called with the default arguments, + # so refactor this function away. + def FetchEntry(self, store_number=-1, store_index=-1): + """Fetches an entry from the storage. + + Fetches the next entry in the storage file, except if location + is explicitly indicated. + + Args: + store_number: The store number if explicit location is to be read. + store_index: The index into the store, if explicit location is to be + read. + + Returns: + An EventObject, either the next one or from a specific location. + """ + if store_number > 0: + return self.store.GetEventObject(store_number, store_index) + else: + return self.store.GetSortedEntry() + + def WriteEvent(self, evt): + """Write the output of a single entry to the output filehandle. + + This method takes care of actually outputting each event in + question. It does so by first prepending it with potential + start of event, then processes the main body before appending + a potential end of event. + + Args: + evt: An EventObject, defined in the event library. + """ + self.StartEvent() + self.EventBody(evt) + self.EndEvent() + + @abc.abstractmethod + def EventBody(self, evt): + """Writes the main body of an event to the output filehandle. + + Args: + evt: An EventObject, defined in the event library. + + Raises: + NotImplementedError: When not implemented. + """ + + def StartEvent(self): + """This should be extended by specific implementations. + + This method does all preprocessing or output before each event + is printed, for instance to surround XML events with tags, etc. + """ + pass + + def EndEvent(self): + """This should be extended by specific implementations. + + This method does all the post-processing or output after + each event has been printed, such as closing XML tags, etc. + """ + pass + + def Start(self): + """This should be extended by specific implementations. + + Depending on the file format of the output it may need + a header. This method should return a header if one is + defined in that output format. + """ + pass + + def End(self): + """This should be extended by specific implementations. + + Depending on the file format of the output it may need + a footer. This method should return a footer if one is + defined in that output format. + """ + pass + + +# Need to suppress this since these classes do not implement the +# abstract method EventBody, classes that inherit from one of these +# classes need to implement that function. +# pylint: disable=abstract-method +class FileLogOutputFormatter(LogOutputFormatter): + """A simple file based output formatter.""" + + __abstract = True + + def __init__(self, store, filehandle=sys.stdout, config=None, + filter_use=None): + """Set up the formatter.""" + super(FileLogOutputFormatter, self).__init__( + store, filehandle, config, filter_use) + if isinstance(filehandle, basestring): + open_filehandle = open(filehandle, 'wb') + elif hasattr(filehandle, 'write'): + open_filehandle = filehandle + else: + raise IOError( + u'Unable to determine how to use filehandle passed in: {}'.format( + type(filehandle))) + + self.filehandle = OutputFilehandle(self.encoding) + self.filehandle.Open(open_filehandle) + + def End(self): + """Close the open filehandle after the last output.""" + super(FileLogOutputFormatter, self).End() + self.filehandle.Close() + + +class EventBuffer(object): + """Buffer class for EventObject output processing.""" + + MERGE_ATTRIBUTES = ['inode', 'filename', 'display_name'] + + def __init__(self, formatter, check_dedups=True): + """Initialize the EventBuffer. + + This class is used for buffering up events for duplicate removals + and for other post-processing/analysis of events before being presented + by the appropriate output module. + + Args: + formatter: An OutputFormatter object. + check_dedups: Boolean value indicating whether or not the buffer should + check and merge duplicate entries or not. + """ + self._buffer_dict = {} + self._current_timestamp = 0 + self.duplicate_counter = 0 + self.check_dedups = check_dedups + + self.formatter = formatter + self.formatter.Start() + + def Append(self, event_object): + """Append an EventObject into the processing pipeline. + + Args: + event_object: The EventObject that is being added. + """ + if not self.check_dedups: + self.formatter.WriteEvent(event_object) + return + + if event_object.timestamp != self._current_timestamp: + self._current_timestamp = event_object.timestamp + self.Flush() + + key = event_object.EqualityString() + if key in self._buffer_dict: + self.JoinEvents(event_object, self._buffer_dict.pop(key)) + self._buffer_dict[key] = event_object + + def Flush(self): + """Flushes the buffer by sending records to a formatter and prints.""" + if not self._buffer_dict: + return + + for event_object in self._buffer_dict.values(): + try: + self.formatter.WriteEvent(event_object) + except errors.WrongFormatter as exception: + logging.error(u'Unable to write event: {:s}'.format(exception)) + + self._buffer_dict = {} + + def JoinEvents(self, event_a, event_b): + """Join this EventObject with another one.""" + self.duplicate_counter += 1 + # TODO: Currently we are using the first event pathspec, perhaps that + # is not the best approach. There is no need to have all the pathspecs + # inside the combined event, however which one should be chosen is + # perhaps something that can be evaluated here (regular TSK in favor of + # an event stored deep inside a VSS for instance). + for attr in self.MERGE_ATTRIBUTES: + val_a = set(utils.GetUnicodeString(getattr(event_a, attr, '')).split(';')) + val_b = set(utils.GetUnicodeString(getattr(event_b, attr, '')).split(';')) + values_list = list(val_a | val_b) + values_list.sort() # keeping this consistent across runs helps with diffs + setattr(event_a, attr, u';'.join(values_list)) + + # Special instance if this is a filestat entry we need to combine the + # description field. + if getattr(event_a, 'parser', u'') == 'filestat': + description_a = set(getattr(event_a, 'timestamp_desc', u'').split(';')) + description_b = set(getattr(event_b, 'timestamp_desc', u'').split(';')) + descriptions = list(description_a | description_b) + descriptions.sort() + if event_b.timestamp_desc not in event_a.timestamp_desc: + setattr(event_a, 'timestamp_desc', u';'.join(descriptions)) + + def End(self): + """Call the formatter to produce the closing line.""" + self.Flush() + + if self.formatter: + self.formatter.End() + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Make usable with "with" statement.""" + self.End() + + def __enter__(self): + """Make usable with "with" statement.""" + return self + + +class OutputFilehandle(object): + """A simple wrapper for filehandles to make character encoding easier. + + All data is stored as an unicode text internally. However there are some + issues with clients that try to output unicode text to a non-unicode terminal. + Therefore a wrapper is created that checks if we are writing to a file, thus + using the default unicode encoding or if the attempt is to write to the + terminal, for which the default encoding of that terminal is used to encode + the text (if possible). + """ + + DEFAULT_ENCODING = 'utf-8' + + def __init__(self, encoding='utf-8'): + """Initialize the output file handler. + + Args: + encoding: The default terminal encoding, only used if attempted to write + to the terminal. + """ + self._filehandle = None + self._encoding = encoding + # An attribute stating whether or not this is STDOUT. + self._standard_out = False + + def Open(self, filehandle=sys.stdout, path=''): + """Open a filehandle to an output file. + + Args: + filehandle: A file-like-object that is used to write data to. + path: If a file like object is not passed in it is possible + to pass in a path to a file, and a file-like-objec will be created. + """ + if path: + self._filehandle = open(path, 'wb') + else: + self._filehandle = filehandle + + if not hasattr(self._filehandle, 'name'): + self._standard_out = True + elif self._filehandle.name.startswith(''): + self._standard_out = True + + def WriteLine(self, line): + """Write a single line to the supplied filehandle.""" + if not self._filehandle: + return + + if self._standard_out: + # Write using preferred user encoding. + try: + self._filehandle.write(line.encode(self._encoding)) + except UnicodeEncodeError: + logging.error( + u'Unable to properly write logline, save output to a file to ' + u'prevent missing data.') + self._filehandle.write(line.encode(self._encoding, 'ignore')) + + else: + # Write to a file, use unicode. + self._filehandle.write(line.encode(self.DEFAULT_ENCODING)) + + def Close(self): + """Close the filehandle, if applicable.""" + if self._filehandle and not self._standard_out: + self._filehandle.close() + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Make usable with "with" statement.""" + self.Close() + + def __enter__(self): + """Make usable with "with" statement.""" + return self + + +def GetOutputFormatter(output_string): + """Return an output formatter that matches the provided string.""" + # Format the output string (make the input case in-sensitive). + if type(output_string) not in (str, unicode): + return None + + format_str = ''.join( + [output_string[0].upper(), output_string[1:].lower()]) + return LogOutputFormatter.classes.get(format_str, None) + + +def ListOutputFormatters(): + """Generate a list of all available output formatters.""" + for cl in LogOutputFormatter.classes: + formatter_class = LogOutputFormatter.classes[cl](None) + doc_string, _, _ = formatter_class.__doc__.partition('\n') + yield cl, doc_string diff --git a/plaso/lib/output_test.py b/plaso/lib/output_test.py new file mode 100644 index 0000000..b944d72 --- /dev/null +++ b/plaso/lib/output_test.py @@ -0,0 +1,193 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains tests for the output formatter.""" +import os +import locale +import sys +import tempfile +import unittest + +from plaso.lib import output + + +class DummyEvent(object): + """Simple class that defines a dummy event.""" + + def __init__(self, timestamp, entry): + self.date = u'03/01/2012' + try: + self.timestamp = int(timestamp) + except ValueError: + self.timestamp = 0 + self.entry = entry + def EqualityString(self): + return u';'.join(map(str, [self.timestamp, self.entry])) + + +class TestOutput(output.LogOutputFormatter): + """This is a test output module that provides a simple XML.""" + + def __init__(self, filehandle): + """Fake the store.""" + super(TestOutput, self).__init__(store=None, filehandle=filehandle) + + def StartEvent(self): + self.filehandle.write(u'\n') + + def EventBody(self, event_object): + self.filehandle.write(( + u'\t{0:s}\n\t\n' + u'\t{2:s}\n').format( + event_object.date, event_object.timestamp, event_object.entry)) + + def EndEvent(self): + self.filehandle.write(u'\n') + + def FetchEntry(self, **_): + pass + + def Start(self): + self.filehandle.write(u'\n') + + def End(self): + self.filehandle.write(u'\n') + + +class PlasoOutputUnitTest(unittest.TestCase): + """The unit test for plaso output formatting.""" + + def testOutput(self): + """Test a test implementation of the output formatter.""" + events = [DummyEvent(123456, u'My Event Is Now!'), + DummyEvent(123458, u'There is no tomorrow.'), + DummyEvent(123462, u'Tomorrow is now.'), + DummyEvent(123489, u'This is just some stuff to fill the line.')] + + lines = [] + with tempfile.NamedTemporaryFile() as fh: + formatter = TestOutput(fh) + formatter.Start() + for event_object in events: + formatter.WriteEvent(event_object) + formatter.End() + + fh.seek(0) + for line in fh: + lines.append(line) + + self.assertEquals(len(lines), 22) + self.assertEquals(lines[0], u'\n') + self.assertEquals(lines[1], u'\n') + self.assertEquals(lines[2], u'\t03/01/2012\n') + self.assertEquals(lines[3], u'\t\n') + self.assertEquals(lines[4], u'\tMy Event Is Now!\n') + self.assertEquals(lines[5], u'\n') + self.assertEquals(lines[6], u'\n') + self.assertEquals(lines[7], u'\t03/01/2012\n') + self.assertEquals(lines[8], u'\t\n') + self.assertEquals(lines[9], u'\tThere is no tomorrow.\n') + self.assertEquals(lines[10], u'\n') + self.assertEquals(lines[11], u'\n') + self.assertEquals(lines[-1], u'\n') + + def testOutputList(self): + """Test listing up all available registered modules.""" + module_seen = False + for name, description in output.ListOutputFormatters(): + if 'TestOutput' in name: + module_seen = True + self.assertEquals(description, ( + u'This is a test output module that provides a simple XML.')) + + self.assertTrue(module_seen) + + +class EventBufferTest(unittest.TestCase): + """Few unit tests for the EventBuffer class.""" + + def testFlush(self): + """Test to ensure we empty our buffers and sends to output properly.""" + with tempfile.NamedTemporaryFile() as fh: + + def CheckBufferLength(event_buffer, expected): + if not event_buffer.check_dedups: + expected = 0 + # pylint: disable=protected-access + self.assertEquals(len(event_buffer._buffer_dict), expected) + + formatter = TestOutput(fh) + event_buffer = output.EventBuffer(formatter, False) + + event_buffer.Append(DummyEvent(123456, u'Now is now')) + CheckBufferLength(event_buffer, 1) + + # Add three events. + event_buffer.Append(DummyEvent(123456, u'OMG I AM DIFFERENT')) + event_buffer.Append(DummyEvent(123456, u'Now is now')) + event_buffer.Append(DummyEvent(123456, u'Now is now')) + CheckBufferLength(event_buffer, 2) + + event_buffer.Flush() + CheckBufferLength(event_buffer, 0) + + event_buffer.Append(DummyEvent(123456, u'Now is now')) + event_buffer.Append(DummyEvent(123456, u'Now is now')) + event_buffer.Append(DummyEvent(123456, u'Different again :)')) + CheckBufferLength(event_buffer, 2) + event_buffer.Append(DummyEvent(123457, u'Now is different')) + CheckBufferLength(event_buffer, 1) + + +class OutputFilehandleTest(unittest.TestCase): + """Few unit tests for the OutputFilehandle.""" + + def setUp(self): + self.preferred_encoding = locale.getpreferredencoding() + + def _GetLine(self): + # Time, Þorri allra landsmanna hlýddu á atburðinn. + return ('Time, \xc3\x9eorri allra landsmanna hl\xc3\xbdddu \xc3\xa1 ' + 'atbur\xc3\xb0inn.\n').decode('utf-8') + + def testFilePath(self): + temp_path = '' + with tempfile.NamedTemporaryFile(delete=True) as temp_file: + temp_path = temp_file.name + + with output.OutputFilehandle(self.preferred_encoding) as fh: + fh.Open(path=temp_path) + fh.WriteLine(self._GetLine()) + + line_read = u'' + with open(temp_path, 'rb') as output_file: + line_read = output_file.read() + + os.remove(temp_path) + self.assertEquals(line_read, self._GetLine().encode('utf-8')) + + def testStdOut(self): + with output.OutputFilehandle(self.preferred_encoding) as fh: + fh.Open(sys.stdout) + try: + fh.WriteLine(self._GetLine()) + self.assertTrue(True) + except (UnicodeEncodeError, UnicodeDecodeError): + self.assertTrue(False) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/lib/pfilter.py b/plaso/lib/pfilter.py new file mode 100644 index 0000000..736e749 --- /dev/null +++ b/plaso/lib/pfilter.py @@ -0,0 +1,455 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""An extension of the objectfilter to provide plaso specific options.""" + +import datetime +import logging + +from plaso.formatters import manager as formatters_manager + +# TODO: Changes this so it becomes an attribute instead of having backend +# load a front-end library. +from plaso.frontend import presets + +from plaso.lib import limit +from plaso.lib import objectfilter +from plaso.lib import timelib +from plaso.lib import utils + + +class DictObject(object): + """A simple object representing a dict object. + + To filter against an object that is stored as a dictionary the dict + is converted into a simple object. Since keys can contain spaces + and/or other symbols they are stripped out to make filtering work + like it is another object. + + Example dict: + {'A value': 234, + 'this (my) key_': 'value', + 'random': True, + } + + This object would then allow access to object.thismykey that would access + the key 'this (my) key_' inside the dict. + """ + + def __init__(self, dict_object): + """Initialize the object and build a secondary dict.""" + # TODO: Move some of this code to a more value typed system. + self._dict_object = dict_object + + self._dict_translated = {} + for key, value in dict_object.items(): + self._dict_translated[self._StripKey(key)] = value + + def _StripKey(self, key): + """Return a stripped version of the dict key without symbols.""" + try: + return str(key).lower().translate(None, ' (){}+_=-<>[]') + except UnicodeEncodeError: + pass + + def __getattr__(self, attr): + """Return back entries from the dictionary.""" + if attr in self._dict_object: + return self._dict_object.get(attr) + + # Special case of getting all the key/value pairs. + if attr == '__all__': + ret = [] + for key, value in self._dict_translated.items(): + ret.append(u'{}:{}'.format(key, value)) + return u' '.join(ret) + + test = self._StripKey(attr) + if test in self._dict_translated: + return self._dict_translated.get(test) + + +class PlasoValueExpander(objectfilter.AttributeValueExpander): + """An expander that gives values based on object attribute names.""" + + def __init__(self): + """Initialize an attribue value expander.""" + super(PlasoValueExpander, self).__init__() + self._formatters_manager = formatters_manager.EventFormatterManager + + def _GetMessage(self, obj): + """Return a properly formatted message string.""" + ret = u'' + + try: + ret, _ = self._formatters_manager.GetMessageStrings(obj) + except KeyError as exception: + logging.warning(u'Unable to correctly assemble event: {0:s}'.format( + exception)) + + return ret + + def _GetSources(self, obj): + """Return a properly formatted source strings.""" + try: + source_short, source_long = self._formatters_manager.GetSourceStrings(obj) + except KeyError as exception: + logging.warning(u'Unable to correctly assemble event: {0:s}'.format( + exception)) + + return source_short, source_long + + def _GetValue(self, obj, attr_name): + ret = getattr(obj, attr_name, None) + + if ret: + if isinstance(ret, dict): + ret = DictObject(ret) + + if attr_name == 'tag': + return ret.tags + + return ret + + # Check if this is a message request and we have a regular EventObject. + if attr_name == 'message': + return self._GetMessage(obj) + + # Check if this is a source_short request. + if attr_name in ('source', 'source_short'): + source_short, _ = self._GetSources(obj) + return source_short + + # Check if this is a source_long request. + if attr_name in ('source_long', 'sourcetype'): + _, source_long = self._GetSources(obj) + return source_long + + def _GetAttributeName(self, path): + return path[0].lower() + + +class PlasoExpression(objectfilter.BasicExpression): + """A Plaso specific expression.""" + # A simple dictionary used to swap attributes so other names can be used + # to reference some core attributes (implementation specific). + swap_source = { + 'date': 'timestamp', + 'datetime': 'timestamp', + 'time': 'timestamp', + 'description_long': 'message', + 'description': 'message', + 'description_short': 'message_short', + } + + def Compile(self, filter_implementation): + self.attribute = self.swap_source.get(self.attribute, self.attribute) + arguments = [self.attribute] + op_str = self.operator.lower() + operator = filter_implementation.OPS.get(op_str, None) + + if not operator: + raise objectfilter.ParseError(u'Unknown operator {0:s} provided.'.format( + self.operator)) + + # Plaso specific implementation - if we are comparing a timestamp + # to a value, we use our specific implementation that compares + # timestamps in a "human readable" format. + if self.attribute == 'timestamp': + args = [] + for arg in self.args: + args.append(DateCompareObject(arg)) + self.args = args + + for arg in self.args: + if isinstance(arg, DateCompareObject): + if 'Less' in str(operator): + TimeRangeCache.SetUpperTimestamp(arg.data) + else: + TimeRangeCache.SetLowerTimestamp(arg.data) + arguments.extend(self.args) + expander = filter_implementation.FILTERS['ValueExpander'] + ops = operator(arguments=arguments, value_expander=expander) + if not self.bool_value: + if hasattr(ops, 'FlipBool'): + ops.FlipBool() + + return ops + + +class ParserList(objectfilter.GenericBinaryOperator): + """Matches when a parser is inside a predefined list of parsers.""" + + def __init__(self, *children, **kwargs): + """Construct the parser list and retrieve a list of available parsers.""" + super(ParserList, self).__init__(*children, **kwargs) + self.compiled_list = presets.categories.get( + self.right_operand.lower(), []) + + def Operation(self, x, unused_y): + """Return a bool depending on the parser list contains the parser.""" + if self.left_operand != 'parser': + raise objectfilter.MalformedQueryError( + u'Unable to use keyword "inlist" for other than parser.') + + if x in self.compiled_list: + return True + + return False + + +class PlasoAttributeFilterImplementation(objectfilter.BaseFilterImplementation): + """Does field name access on the lowercase version of names. + + Useful to only access attributes and properties with Google's python naming + style. + """ + + FILTERS = {} + FILTERS.update(objectfilter.BaseFilterImplementation.FILTERS) + FILTERS.update({'ValueExpander': PlasoValueExpander}) + OPS = objectfilter.OP2FN + OPS.update({'inlist': ParserList,}) + + +class DateCompareObject(object): + """A specific class created for date comparison. + + This object takes a date representation, whether that is a direct integer + datetime object or a string presenting the date, and uses that for comparing + against timestamps stored in microseconds in in microseconds since + Jan 1, 1970 00:00:00 UTC. + + This makes it possible to use regular comparison operators for date, + irrelevant of the format the date comes in, since plaso stores all timestamps + in the same format, which is an integer/long, it is a simple manner of + changing the input into the same format (int) and compare that. + """ + + def __init__(self, data): + """Take a date object and use that for comparison. + + Args: + data: A string, datetime object or an integer that + represents the time to compare against. Time should be stored + as microseconds since UTC in Epoch format. + + Raises: + ValueError: if the date string is invalid. + """ + self.text = utils.GetUnicodeString(data) + if type(data) in (int, long): + self.data = data + elif type(data) == float: + self.data = long(data) + elif type(data) in (str, unicode): + try: + self.data = timelib.Timestamp.FromTimeString( + utils.GetUnicodeString(data)) + except ValueError as exception: + raise ValueError(u'Wrongly formatted date string: {0:s} - {1:s}'.format( + data, exception)) + elif type(data) == datetime.datetime: + self.data = timelib.Timestamp.FromPythonDatetime(data) + elif isinstance(DateCompareObject, data): + self.data = data.data + else: + raise ValueError(u'Unsupported type: {0:s}.'.format(type(data))) + + def __cmp__(self, x): + """A simple comparison operation.""" + try: + x_date = DateCompareObject(x) + return cmp(self.data, x_date.data) + except ValueError: + return False + + def __le__(self, x): + """Less or equal comparison.""" + return self.data <= x + + def __ge__(self, x): + """Greater or equal comparison.""" + return self.data >= x + + def __eq__(self, x): + """Check if equal.""" + return x == self.data + + def __ne__(self, x): + """Check if not equal.""" + return x != self.data + + def __str__(self): + """Return a string representation of the object.""" + return self.text + + +class BaseParser(objectfilter.Parser): + """Plaso version of the Parser.""" + + expression_cls = PlasoExpression + + +class TrueObject(object): + """A simple object that always returns true for all comparison. + + This object is used for testing certain conditions inside filter queries. + By returning true for all comparisons this object can be used to evaluate + specific portions of a filter query. + """ + + def __init__(self, txt=''): + """Save the text object so it can be used when comparing text.""" + self.txt = txt + + def __getattr__(self, unused_attr): + """Return a TrueObject for every attribute request.""" + return self + + def __eq__(self, unused_x): + """Return true for tests of equality.""" + return True + + def __gt__(self, unused_x): + """Return true for checks for greater.""" + return True + + def __ge__(self, unused_x): + """Return true for checks for greater or equal.""" + return True + + def __lt__(self, unused_x): + """Return true for checks of less.""" + return True + + def __le__(self, unused_x): + """Return true for checks of less or equal.""" + return True + + def __ne__(self, unused_x): + """Return true for all not equal comparisons.""" + return True + + def __iter__(self): + """Return a generator so a test for the in keyword can be used.""" + yield self + + def __str__(self): + """Return a string to make regular expression searches possible. + + Returns: + A string that containes the original query with some of the matches + expanded, perhaps several times. + """ + # Regular expressions in pfilter may include the following escapes: + # "\\'\"rnbt\.ws": + txt = self.txt + if r'\.' in self.txt: + txt += self.txt.replace(r'\.', ' _ text _ ') + + if r'\b' in self.txt: + txt += self.txt.replace(r'\b', ' ') + + if r'\s' in self.txt: + txt += self.txt.replace(r'\s', ' ') + + return txt + + +class MockTestFilter(object): + """A mock test filter object used to test certain portion of test queries. + + The logic behind this object is that a single attribute can be isolated + for comparison. That is to say all calls to attributes will lead to a TRUE + response, except those attributes that are specifically stated in the + constructor. This way it is simple to test for instance whether or not + to include a parser at all, before actually running the tool. The same applies + to filtering out certain filenames, etc. + """ + + def __init__(self, query, **kwargs): + """Constructor, only valid attribute is the parser one.""" + self.attributes = kwargs + self.txt = query + + def __getattr__(self, attr): + """Return TrueObject for all requests except for stored attributes.""" + if attr in self.attributes: + return self.attributes.get(attr, None) + + # TODO: Either delete this entire object (MockTestFilter) or implement + # a false object and return the correct one depending on whether we + # are looking for a true or negative response (eg "not" keyword included). + return TrueObject(self.txt) + + +class TimeRangeCache(object): + """A class that stores timeranges from filters.""" + + @classmethod + def ResetTimeConstraints(cls): + """Resets the time constraints.""" + if hasattr(cls, '_lower'): + del cls._lower + if hasattr(cls, '_upper'): + del cls._upper + + @classmethod + def SetLowerTimestamp(cls, timestamp): + """Sets the lower bound timestamp.""" + if not hasattr(cls, '_lower'): + cls._lower = timestamp + return + + if timestamp < cls._lower: + cls._lower = timestamp + + @classmethod + def SetUpperTimestamp(cls, timestamp): + """Sets the upper bound timestamp.""" + if not hasattr(cls, '_upper'): + cls._upper = timestamp + return + + if timestamp > cls._upper: + cls._upper = timestamp + + @classmethod + def GetTimeRange(cls): + """Return the first and last timestamp of filter range.""" + first = getattr(cls, '_lower', 0) + last = getattr(cls, '_upper', limit.MAX_INT64) + + if first < last: + return first, last + else: + return last, first + + +def GetMatcher(query, quiet=False): + """Return a filter match object for a given query.""" + matcher = None + try: + parser = BaseParser(query).Parse() + matcher = parser.Compile(PlasoAttributeFilterImplementation) + except objectfilter.ParseError as exception: + if not quiet: + logging.error(u'Filter <{0:s}> malformed: {1:s}'.format( + query, exception)) + + return matcher diff --git a/plaso/lib/pfilter_test.py b/plaso/lib/pfilter_test.py new file mode 100644 index 0000000..0d83996 --- /dev/null +++ b/plaso/lib/pfilter_test.py @@ -0,0 +1,238 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the filters.""" + +import unittest + +from plaso.formatters import interface as formatters_interface +from plaso.lib import event +from plaso.lib import objectfilter +from plaso.lib import pfilter +from plaso.lib import timelib_test +from plaso.parsers import interface as parsers_interface + +import pytz + + +class Empty(object): + """An empty object.""" + + +class PfilterFakeFormatter(formatters_interface.EventFormatter): + """A formatter for this fake class.""" + DATA_TYPE = 'Weirdo:Made up Source:Last Written' + + FORMAT_STRING = '{text}' + FORMAT_STRING_SHORT = '{text_short}' + + SOURCE_LONG = 'Fake Parsing Source' + SOURCE_SHORT = 'REG' + + +class PfilterFakeParser(parsers_interface.BaseParser): + """A fake parser that does not parse anything, but registers.""" + + NAME = 'pfilter_fake_parser' + + DATA_TYPE = 'Weirdo:Made up Source:Last Written' + + def Parse(self, unused_parser_context, unused_file_entry): + """Extract data from a fake plist file for testing. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + + Yields: + An event object (instance of EventObject) that contains the parsed + attributes. + """ + event_object = event.EventObject() + event_object.timestamp = timelib_test.CopyStringToTimestamp( + '2015-11-18 01:15:43') + event_object.timestamp_desc = 'Last Written' + event_object.text_short = 'This description is different than the long one.' + event_object.text = ( + u'User did a very bad thing, bad, bad thing that awoke Dr. Evil.') + event_object.filename = ( + u'/My Documents/goodfella/Documents/Hideout/myfile.txt') + event_object.hostname = 'Agrabah' + event_object.parser = 'Weirdo' + event_object.inode = 1245 + event_object.display_name = u'unknown:{0:s}'.format(event_object.filename) + event_object.data_type = self.DATA_TYPE + + yield event_object + + +class PfilterAnotherParser(PfilterFakeParser): + """Another fake parser that does nothing but register as a parser.""" + + NAME = 'pfilter_another_fake' + + DATA_TYPE = 'Weirdo:AnotherFakeSource' + + +class PfilterAnotherFakeFormatter(PfilterFakeFormatter): + """Formatter for the AnotherParser event.""" + + DATA_TYPE = 'Weirdo:AnotherFakeSource' + SOURCE_LONG = 'Another Fake Source' + + +class PfilterAllEvilParser(PfilterFakeParser): + """A class that does nothing but has a fancy name.""" + + NAME = 'pfilter_evil_fake_parser' + + DATA_TYPE = 'Weirdo:AllEvil' + + +class PfilterEvilFormatter(PfilterFakeFormatter): + """Formatter for the AllEvilParser.""" + + DATA_TYPE = 'Weirdo:AllEvil' + SOURCE_LONG = 'A Truly Evil' + + +class PFilterTest(unittest.TestCase): + """Simple plaso specific tests to the pfilter implementation.""" + + def setUp(self): + """Set up the necessary variables used in tests.""" + self._pre = Empty() + self._pre.zone = pytz.UTC + + def testPlasoEvents(self): + """Test plaso EventObjects, both Python and Protobuf version. + + These are more plaso specific tests than the more generic + objectfilter ones. It will create an EventObject that stores + some attributes. These objects will then be serialzed into an + EventObject protobuf and all tests run against both the native + Python object as well as the protobuf. + """ + event_object = event.EventObject() + event_object.data_type = 'Weirdo:Made up Source:Last Written' + event_object.timestamp = timelib_test.CopyStringToTimestamp( + '2015-11-18 01:15:43') + event_object.timestamp_desc = 'Last Written' + event_object.text_short = 'This description is different than the long one.' + event_object.text = ( + u'User did a very bad thing, bad, bad thing that awoke Dr. Evil.') + event_object.filename = ( + u'/My Documents/goodfella/Documents/Hideout/myfile.txt') + event_object.hostname = 'Agrabah' + event_object.parser = 'Weirdo' + event_object.inode = 1245 + event_object.mydict = { + 'value': 134, 'another': 'value', 'A Key (with stuff)': 'Here'} + event_object.display_name = u'unknown:{0:s}'.format(event_object.filename) + + # Series of tests. + query = 'filename contains \'GoodFella\'' + self.RunPlasoTest(event_object, query, True) + + # Double negative matching -> should be the same + # as a positive one. + query = 'filename not not contains \'GoodFella\'' + my_parser = pfilter.BaseParser(query) + self.assertRaises( + objectfilter.ParseError, + my_parser.Parse) + + # Test date filtering. + query = 'date >= \'2015-11-18\'' + self.RunPlasoTest(event_object, query, True) + + query = 'date < \'2015-11-19\'' + self.RunPlasoTest(event_object, query, True) + + # 2015-11-18T01:15:43 + query = ( + 'date < \'2015-11-18T01:15:44.341\' and date > \'2015-11-18 01:15:42\'') + self.RunPlasoTest(event_object, query, True) + + query = 'date > \'2015-11-19\'' + self.RunPlasoTest(event_object, query, False) + + # Perform few attribute tests. + query = 'filename not contains \'sometext\'' + self.RunPlasoTest(event_object, query, True) + + query = ( + 'timestamp_desc CONTAINS \'written\' AND date > \'2015-11-18\' AND ' + 'date < \'2015-11-25 12:56:21\' AND (source_short contains \'LOG\' or ' + 'source_short CONTAINS \'REG\')') + self.RunPlasoTest(event_object, query, True) + + query = 'parser is not \'Made\'' + self.RunPlasoTest(event_object, query, True) + + query = 'parser is not \'Weirdo\'' + self.RunPlasoTest(event_object, query, False) + + query = 'mydict.value is 123' + self.RunPlasoTest(event_object, query, False) + + query = 'mydict.akeywithstuff contains "ere"' + self.RunPlasoTest(event_object, query, True) + + query = 'mydict.value is 134' + self.RunPlasoTest(event_object, query, True) + + query = 'mydict.value < 200' + self.RunPlasoTest(event_object, query, True) + + query = 'mydict.another contains "val"' + self.RunPlasoTest(event_object, query, True) + + query = 'mydict.notthere is 123' + self.RunPlasoTest(event_object, query, False) + + query = 'source_long not contains \'Fake\'' + self.RunPlasoTest(event_object, query, False) + + query = 'source is \'REG\'' + self.RunPlasoTest(event_object, query, True) + + query = 'source is not \'FILE\'' + self.RunPlasoTest(event_object, query, True) + + # Multiple attributes. + query = ( + 'source_long is \'Fake Parsing Source\' AND description_long ' + 'regexp \'bad, bad thing [\\sa-zA-Z\\.]+ evil\'') + self.RunPlasoTest(event_object, query, False) + + query = ( + 'source_long is \'Fake Parsing Source\' AND text iregexp ' + '\'bad, bad thing [\\sa-zA-Z\\.]+ evil\'') + self.RunPlasoTest(event_object, query, True) + + def RunPlasoTest(self, obj, query, result): + """Run a simple test against an event object.""" + my_parser = pfilter.BaseParser(query).Parse() + matcher = my_parser.Compile( + pfilter.PlasoAttributeFilterImplementation) + + self.assertEqual(result, matcher.Matches(obj)) + + +if __name__ == "__main__": + unittest.main() diff --git a/plaso/lib/proxy.py b/plaso/lib/proxy.py new file mode 100644 index 0000000..a40f034 --- /dev/null +++ b/plaso/lib/proxy.py @@ -0,0 +1,130 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a proxy object that can be used to provide RPC access.""" + +import abc + + +def GetProxyPortNumberFromPID(process_id): + """Simple mechanism to set the port number based on a PID value. + + Args: + process_id: An integer, process ID (PID), value that should be used to find + a port number. + + Returns: + An integer indicating a possible port number for the process to listen on. + """ + # TODO: Improve this method of selecting ports. + # This is in now way a perfect algorightm for choosing port numbers (what if + # it is already assigned?, etc) + if process_id < 1024: + return process_id + 1024 + + if process_id > 65535: + # Return the remainder of highest port number, sent back to the + # function itself, since this number could be lower than 1024. + return GetProxyPortNumberFromPID(process_id % 65535) + + return process_id + + +class ProxyServer(object): + """An interface defining functions needed for a proxy object.""" + + def __init__(self, port=0): + """Initialize the proxy object. + + Args: + port: An integer indicating the port number the proxy listens to. + This is optional and defaults to port zero. + """ + super(ProxyServer, self).__init__() + self._port_number = port + + def __enter__(self): + """Make usable with "with" statement.""" + return self + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Make usable with "with" statement.""" + self.Close() + + @property + def listening_port(self): + """Returns back the port the proxy listens to.""" + return self._port_number + + @abc.abstractmethod + def Close(self): + """Close the proxy server.""" + + @abc.abstractmethod + def Open(self): + """Sets up the necessary objects in order for the proxy to be started.""" + + @abc.abstractmethod + def RegisterFunction(self, function_name, function): + """Register a function for this proxy. + + Args: + function_name: The name of the registered proxy function. + function: The callback for the function providing the answer. + """ + + @abc.abstractmethod + def StartProxy(self): + """Start the proxy. + + This usually involves setting up the proxy to bind to an address and + listen to requests. + """ + + @abc.abstractmethod + def SetListeningPort(self, new_port_number): + """Change the port the proxy listens to.""" + + +class ProxyClient(object): + """An interface defining functions needed to implement a proxy client.""" + + def __init__(self, port=0): + """Initialize the proxy client. + + Args: + port: An integer indicating the port number the proxy connects to. + This is optional and defaults to port zero. + """ + super(ProxyClient, self).__init__() + self._port_number = port + + @abc.abstractmethod + def Open(self): + """Sets up the necessary objects in order for the proxy to be started.""" + + @abc.abstractmethod + def GetData(self, call_back_name): + """Return data extracted from a RPC callback. + + Args: + call_back_name: The name of the call back function or attribute registered + in the RPC service. + + Returns: + The data returned by the RPC server. + """ diff --git a/plaso/lib/putils.py b/plaso/lib/putils.py new file mode 100644 index 0000000..618f30d --- /dev/null +++ b/plaso/lib/putils.py @@ -0,0 +1,58 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains few methods for Plaso.""" + +import logging + +from plaso.lib import output + + +# TODO: Refactor the putils library so it does not end up being a trash can +# for all things core/front-end. We don't want this to be end up being a +# collection for all methods that have no other home. +class Options(object): + """A simple configuration object.""" + + +def _FindClasses(class_object, *args): + """Find all registered classes. + + A method to find all registered classes of a particular + class. + + Args: + class_object: The parent class. + + Returns: + A list of registered classes of that class. + """ + results = [] + for cls in class_object.classes: + try: + results.append(class_object.classes[cls](*args)) + except Exception: + logging.error( + u'_FindClasses: exception while appending: {0:s}'.format(cls)) + raise + + return results + + +def FindAllOutputs(): + """Find all available output modules.""" + return _FindClasses(output.LogOutputFormatter, None) diff --git a/plaso/lib/registry.py b/plaso/lib/registry.py new file mode 100644 index 0000000..0e60b85 --- /dev/null +++ b/plaso/lib/registry.py @@ -0,0 +1,80 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a class registration system for plugins.""" + +import abc + + +class MetaclassRegistry(abc.ABCMeta): + """Automatic Plugin Registration through metaclasses.""" + + def __init__(cls, name, bases, env_dict): + """Initialize a metaclass. + + Args: + name: The interface class name. + bases: A tuple of base names. + env_dict: The namespace of the object. + + Raises: + KeyError: If a classes given name is already registered, to make sure + no two classes that inherit from the same interface can have + the same name attribute. + """ + abc.ABCMeta.__init__(cls, name, bases, env_dict) + + # Register the name of the immediate parent class. + if bases: + cls.parent_class_name = getattr(bases[0], 'NAME', bases[0]) + cls.parent_class = bases[0] + + # Attach the classes dict to the baseclass and have all derived classes + # use the same one: + for base in bases: + try: + cls.classes = base.classes + cls.plugin_feature = base.plugin_feature + cls.top_level_class = base.top_level_class + break + except AttributeError: + cls.classes = {} + cls.plugin_feature = cls.__name__ + # Keep a reference to the top level class + cls.top_level_class = cls + + # The following should not be registered as they are abstract. Classes + # are abstract if the have the __abstract attribute (note this is not + # inheritable so each abstract class must be explicitely marked). + abstract_attribute = '_{0:s}__abstract'.format(name) + if getattr(cls, abstract_attribute, None): + return + + if not cls.__name__.startswith('Abstract'): + cls_name = getattr(cls, 'NAME', cls.__name__) + + if cls_name in cls.classes: + raise KeyError(u'Class: {0:s} already registered. [{1:s}]'.format( + cls_name, repr(cls))) + + cls.classes[cls_name] = cls + + try: + if cls.top_level_class.include_plugins_as_attributes: + setattr(cls.top_level_class, cls.__name__, cls) + except AttributeError: + pass diff --git a/plaso/lib/storage.py b/plaso/lib/storage.py new file mode 100644 index 0000000..141f647 --- /dev/null +++ b/plaso/lib/storage.py @@ -0,0 +1,1564 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The storage mechanism. + +The storage mechanism can be described as a collection of storage files +that are stored together in a single ZIP compressed container. + +The storage file is essentially split up in two categories: + + A store file (further described below). + + Other files, these contain grouping information, tag, collection + information or other metadata describing the content of the store files. + +The store itself is a collection of four files: + plaso_meta. + plaso_proto. + plaso_index. + plaso_timestamps. + +The plaso_proto file within each store contains several serialized EventObjects +or events that are serialized (as a protobuf). All of the EventObjects within +the plaso_proto file are fully sorted based on time however since the storage +container can contain more than one store the overall storage is not fully +sorted. + +The other files that make up the store are: + + + plaso_meta + +Simple text file using YAML for storing metadata information about the store. +definition, example: + variable: value + a_list: [value, value, value] + +This can be used to filter out which proto files should be included +in processing. + + + plaso_index + +The index file contains an index to all the entries stored within +the protobuf file, so that it can be easily seeked. The layout is: + ++-----+-----+-...+ +| int | int | ...| ++-----+-----+-...+ + +Where int is an unsigned integer '= self._file_number: + self._file_number = file_number + 1 + except ValueError: + # Ignore invalid metadata stream names. + pass + + self._first_file_number = self._file_number + + def __enter__(self): + """Make usable with "with" statement.""" + return self + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Make usable with "with" statement.""" + self.Close() + + def _BuildTagIndex(self): + """Builds the tag index that contains the offsets for each tag. + + Raises: + IOError: if the stream cannot be opened. + """ + self._event_tag_index = {} + + for stream_name in self._GetStreamNames(): + if not stream_name.startswith('plaso_tag_index.'): + continue + + file_object = self._OpenStream(stream_name, 'r') + if file_object is None: + raise IOError(u'Unable to open stream: {0:s}'.format(stream_name)) + + _, _, store_number = stream_name.rpartition('.') + # TODO: catch exception. + store_number = int(store_number, 10) + + while True: + tag_index_value = _EventTagIndexValue.Read( + file_object, store_number) + if tag_index_value is None: + break + + self._event_tag_index[tag_index_value.identifier] = tag_index_value + + def _FlushBuffer(self): + """Flushes the buffered streams to disk.""" + if not self._buffer_size: + return + + yaml_dict = { + 'range': (self._buffer_first_timestamp, self._buffer_last_timestamp), + 'version': self.STORAGE_VERSION, + 'data_type': list(self._count_data_type.viewkeys()), + 'parsers': list(self._count_parser.viewkeys()), + 'count': len(self._buffer), + 'type_count': self._count_data_type.most_common()} + self._count_data_type = collections.Counter() + self._count_parser = collections.Counter() + + stream_name = 'plaso_meta.{0:06d}'.format(self._file_number) + self._WriteStream(stream_name, yaml.safe_dump(yaml_dict)) + + ofs = 0 + proto_str = [] + index_str = [] + timestamp_str = [] + for _ in range(len(self._buffer)): + timestamp, entry = heapq.heappop(self._buffer) + # TODO: Instead of appending to an array + # which is not optimal (loads up the entire max file + # size into memory) Zipfile should be extended to + # allow appending to files (implement lock). + try: + # Appending a timestamp to the timestamp index, this is used during + # time based filtering. If this is not done we would need to unserialize + # all events to get the timestamp value which is really slow. + timestamp_str.append(struct.pack('= 0: + stream_offset = self._GetProtoStreamOffset(stream_number, entry_index) + if stream_offset is None: + logging.error(( + u'Unable to read entry index: {0:d} from proto stream: ' + u'{1:d}').format(entry_index, stream_number)) + + return None, None + + file_object, last_entry_index = self._GetProtoStreamSeekOffset( + stream_number, entry_index, stream_offset) + + if (not last_entry_index and entry_index == -1 and + self._bound_first is not None): + # We only get here if the following conditions are met: + # 1. last_entry_index is not set (so this is the first read + # from this file). + # 2. There is a lower bound (so we have a date filter). + # 3. The lower bound is higher than zero (basically set to a value). + # 4. We are accessing this function using 'get me the next entry' as an + # opposed to the 'get me entry X', where we just want to server entry + # X. + # + # The purpose: speed seeking into the storage file based on time. Instead + # of spending precious time reading through the storage file and + # deserializing protobufs just to compare timestamps we read a much + # 'cheaper' file, one that only contains timestamps to find the proper + # entry into the storage file. That way we'll get to the right place in + # the file and can start reading protobufs from the right location. + + stream_name = 'plaso_timestamps.{0:06d}'.format(stream_number) + + if stream_name in self._GetStreamNames(): + timestamp_file_object = self._OpenStream(stream_name, 'r') + if timestamp_file_object is None: + raise IOError(u'Unable to open stream: {0:s}'.format(stream_name)) + + index = 0 + timestamp_compare = 0 + encountered_error = False + while timestamp_compare < self._bound_first: + timestamp_raw = timestamp_file_object.read(8) + if len(timestamp_raw) != 8: + encountered_error = True + break + + timestamp_compare = struct.unpack(' self.MAX_PROTO_STRING_SIZE: + raise errors.WrongProtobufEntry( + u'Protobuf string size value exceeds maximum: {0:d}'.format( + proto_string_size)) + + event_object_data = file_object.read(proto_string_size) + self._proto_streams[stream_number] = (file_object, last_entry_index + 1) + + return event_object_data, last_entry_index + + def _GetEventGroupProto(self, file_object): + """Return a single group entry.""" + unpacked = file_object.read(4) + if len(unpacked) != 4: + return None + + size = struct.unpack(' StorageFile.MAX_PROTO_STRING_SIZE: + raise errors.WrongProtobufEntry( + u'Protobuf size too large: {0:d}'.format(size)) + + proto_serialized = file_object.read(size) + proto = plaso_storage_pb2.EventGroup() + + proto.ParseFromString(proto_serialized) + return proto + + def _GetProtoStream(self, stream_number): + """Retrieves the proto stream. + + Args: + stream_number: the number of the stream. + + Returns: + A tuple of the stream file-like object and the last entry index to + which the offset of the stream file-like object points. + + Raises: + IOError: if the stream cannot be opened. + """ + if stream_number not in self._proto_streams: + stream_name = 'plaso_proto.{0:06d}'.format(stream_number) + + file_object = self._OpenStream(stream_name, 'r') + if file_object is None: + raise IOError(u'Unable to open stream: {0:s}'.format(stream_name)) + + # TODO: change this to a value object and track the stream offset as well. + # This allows to reduce the number of re-opens when the seek offset is + # beyond the current offset. + self._proto_streams[stream_number] = (file_object, 0) + + return self._proto_streams[stream_number] + + def _GetProtoStreamSeekOffset( + self, stream_number, entry_index, stream_offset): + """Retrieves the proto stream and seeks a specified offset in the stream. + + Args: + stream_number: the number of the stream. + entry_index: the entry index. + stream_offset: the offset relative to the start of the stream. + + Returns: + A tuple of the stream file-like object and the last index. + + Raises: + IOError: if the stream cannot be opened. + """ + # Since zipfile.ZipExtFile is not seekable we need to close the stream + # and reopen it to fake a seek. + if stream_number in self._proto_streams: + previous_file_object, _ = self._proto_streams[stream_number] + del self._proto_streams[stream_number] + previous_file_object.close() + + stream_name = 'plaso_proto.{0:06d}'.format(stream_number) + file_object = self._OpenStream(stream_name, 'r') + if file_object is None: + raise IOError(u'Unable to open stream: {0:s}'.format(stream_name)) + + # Since zipfile.ZipExtFile is not seekable we need to read upto + # the stream offset. + _ = file_object.read(stream_offset) + + self._proto_streams[stream_number] = (file_object, entry_index) + + return self._proto_streams[stream_number] + + def _GetProtoStreamOffset(self, stream_number, entry_index): + """Retrieves the offset of a proto stream entry from the index stream. + + Args: + stream_number: the number of the stream. + entry_index: the entry index. + + Returns: + The offset of the entry in the corresponding proto stream + or None on error. + + Raises: + IOError: if the stream cannot be opened. + """ + # TODO: cache the index file object in the same way as the proto + # stream file objects. + + # TODO: once cached use the last entry index to determine if the stream + # file object should be re-opened. + + stream_name = 'plaso_index.{0:06d}'.format(stream_number) + index_file_object = self._OpenStream(stream_name, 'r') + if index_file_object is None: + raise IOError(u'Unable to open stream: {0:s}'.format(stream_name)) + + # Since zipfile.ZipExtFile is not seekable we need to read upto + # the stream offset. + _ = index_file_object.read(entry_index * 4) + + index_data = index_file_object.read(4) + + index_file_object.close() + + if len(index_data) != 4: + return None + + return struct.unpack(' self.MAX_PROTO_STRING_SIZE: + raise errors.WrongProtobufEntry( + u'Protobuf string size value exceeds maximum: {0:d}'.format( + proto_string_size)) + + proto_string = file_object.read(proto_string_size) + return self._event_tag_serializer.ReadSerialized(proto_string) + + def _ReadEventTagByIdentifier(self, store_number, store_index, uuid): + """Reads an event tag by identifier. + + Args: + store_number: the store number. + store_index: the store index. + uuid: the UUID string. + + Returns: + The event tag (instance of EventTag). + + Raises: + IOError: if the stream cannot be opened. + """ + tag_index_value = self._GetEventTagIndexValue( + store_number, store_index, uuid) + if tag_index_value is None: + return + + stream_name = 'plaso_tagging.{0:06d}'.format(tag_index_value.store_number) + tag_file_object = self._OpenStream(stream_name, 'r') + if tag_file_object is None: + raise IOError(u'Unable to open stream: {0:s}'.format(stream_name)) + + # Since zipfile.ZipExtFile is not seekable we need to read upto + # the store offset. + _ = tag_file_object.read(tag_index_value.store_offset) + return self._ReadEventTag(tag_file_object) + + def _ReadStream(self, stream_name): + """Reads the data in a stream. + + Args: + stream_name: the name of the stream. + + Returns: + A byte string containing the data of the stream. + """ + data_segments = [] + file_object = self._OpenStream(stream_name, 'r') + + # zipfile.ZipExtFile does not support the with-statement interface. + if file_object: + data = file_object.read(self._STREAM_DATA_SEGMENT_SIZE) + while data: + data_segments.append(data) + data = file_object.read(self._STREAM_DATA_SEGMENT_SIZE) + + file_object.close() + + return ''.join(data_segments) + + def _SetEventObjectSerializer(self, serializer_string): + """Set the serializer for the event object.""" + if serializer_string == 'json': + self._event_object_serializer = ( + json_serializer.JsonEventObjectSerializer) + self._event_serializer_format_string = 'json' + else: + self._event_object_serializer = ( + protobuf_serializer.ProtobufEventObjectSerializer) + self._event_serializer_format_string = 'proto' + + def _WritePreprocessObject(self, pre_obj): + """Writes a preprocess object to the storage file. + + Args: + pre_obj: the preprocess object (instance of PreprocessObject). + + Raises: + IOError: if the stream cannot be opened. + """ + existing_stream_data = self._ReadStream('information.dump') + + # Store information about store range for this particular + # preprocessing object. This will determine which stores + # this information is applicaple for. + stores = list(self.GetProtoNumbers()) + if stores: + end = stores[-1] + 1 + else: + end = self._first_file_number + pre_obj.store_range = (self._first_file_number, end) + + pre_obj_data = self._pre_obj_serializer.WriteSerialized(pre_obj) + + stream_data = ''.join([ + existing_stream_data, + struct.pack(' self.MAX_PROTO_STRING_SIZE: + raise errors.WrongProtobufEntry( + u'Protobuf size too large: {0:d}'.format(size)) + + serialized_pre_obj = file_object.read(size) + try: + info = self._pre_obj_serializer.ReadSerialized(serialized_pre_obj) + except message.DecodeError: + logging.error(u'Unable to parse preprocessing object, bailing out.') + break + + information.append(info) + + stores = list(self.GetProtoNumbers()) + information[-1].stores = {} + information[-1].stores['Number'] = len(stores) + for store_number in stores: + store_identifier = 'Store {0:d}'.format(store_number) + information[-1].stores[store_identifier] = self.ReadMeta(store_number) + + return information + + def SetStoreLimit(self, unused_my_filter=None): + """Set a limit to the stores used for returning data.""" + # Retrieve set first and last timestamps. + self._bound_first, self._bound_last = pfilter.TimeRangeCache.GetTimeRange() + + self.store_range = [] + + # TODO: Fetch a filter object from the filter query. + + for number in self.GetProtoNumbers(): + # TODO: Read more criteria from here. + first, last = self.ReadMeta(number).get('range', (0, limit.MAX_INT64)) + if last < first: + logging.error( + u'last: {0:d} first: {1:d} container: {2:d} (last < first)'.format( + last, first, number)) + + if first <= self._bound_last and self._bound_first <= last: + # TODO: Check at least parser and data_type (stored in metadata). + # Check whether these attributes exist in filter, if so use the filter + # to determine whether the stores should be included. + self.store_range.append(number) + + else: + logging.debug(u'Store [{0:d}] not used'.format(number)) + + def GetSortedEntry(self): + """Return a sorted entry from the storage file. + + Returns: + An event object (instance of EventObject). + """ + if self._bound_first is None: + self._bound_first, self._bound_last = ( + pfilter.TimeRangeCache.GetTimeRange()) + + if not hasattr(self, '_merge_buffer'): + self._merge_buffer = [] + number_range = getattr(self, 'store_range', list(self.GetProtoNumbers())) + for store_number in number_range: + event_object = self.GetEventObject(store_number) + if not event_object: + return + + while event_object.timestamp < self._bound_first: + event_object = self.GetEventObject(store_number) + if not event_object: + return + + heapq.heappush( + self._merge_buffer, + (event_object.timestamp, store_number, event_object)) + + if not self._merge_buffer: + return + + _, store_number, event_read = heapq.heappop(self._merge_buffer) + if not event_read: + return + + # Stop as soon as we hit the upper bound. + if event_read.timestamp > self._bound_last: + return + + new_event_object = self.GetEventObject(store_number) + + if new_event_object: + heapq.heappush( + self._merge_buffer, + (new_event_object.timestamp, store_number, new_event_object)) + + event_read.tag = self._ReadEventTagByIdentifier( + event_read.store_number, event_read.store_index, event_read.uuid) + + return event_read + + def GetEventObject(self, stream_number, entry_index=-1): + """Reads an event object from the store. + + By default the next entry in the appropriate proto file is read + and returned, however any entry can be read using the index file. + + Args: + stream_number: The proto stream number. + entry_index: Read a specific entry in the file. The default is -1, + which represents the next available entry. + + Returns: + An event object (instance of EventObject) entry read from the file or + None if not able to read in a new event. + """ + event_object_data, entry_index = self._GetEventObjectProtobufString( + stream_number, entry_index=entry_index) + if not event_object_data: + return + + event_object = self._event_object_serializer.ReadSerialized( + event_object_data) + event_object.store_number = stream_number + event_object.store_index = entry_index + + return event_object + + def GetEntries(self, number): + """A generator to read all plaso_storage protobufs. + + The storage mechanism of Plaso works in the way that it creates potentially + several files inside the ZIP container. As soon as the number of protobufs + stored exceed the size of buffer_size they will be flushed to disk as: + + plaso_proto.XXX + + Where XXX is an increasing integer, starting from one. To get all the files + or the numbers that are available this class implements a method called + GetProtoNumbers() that returns a list of all available protobuf files within + the container. + + This method returns a generator that returns all plaso_storage protobufs in + the named container, as indicated by the number argument. So if this method + is called as storage_object.GetEntries(1) the generator will return the + entries found in the file plaso_proto.000001. + + Args: + number: The protofile number. + + Yields: + A protobuf object from the protobuf file. + """ + # TODO: Change this function, don't accept a store number and implement the + # MergeSort functionality of the psort file in here. This will then always + # return the sorted entries from the storage file, implementing the second + # stage of the sort/merge algorithm. + while True: + try: + proto = self.GetEventObject(number) + if not proto: + logging.debug( + u'End of protobuf file plaso_proto.{0:06d} reached.'.format( + number)) + break + yield proto + except errors.WrongProtobufEntry as exception: + logging.warning(( + u'Problem while parsing a protobuf entry from: ' + u'plaso_proto.{0:06d} with error: {1:s}').format(number, exception)) + + def GetProtoNumbers(self): + """Return all available protobuf numbers.""" + numbers = [] + for name in self._GetStreamNames(): + if 'plaso_proto' in name: + _, num = name.split('.') + numbers.append(int(num)) + + for number in sorted(numbers): + yield number + + def ReadMeta(self, number): + """Return a dict with the metadata entries. + + Args: + number: The number of the metadata file (name is plaso_meta_XXX where + XXX is this number. + + Returns: + A dict object containing all the variables inside the metadata file. + + Raises: + IOError: if the stream cannot be opened. + """ + stream_name = 'plaso_meta.{0:06d}'.format(number) + file_object = self._OpenStream(stream_name, 'r') + if file_object is None: + raise IOError(u'Unable to open stream: {0:s}'.format(stream_name)) + return yaml.safe_load(file_object) + + def GetBufferSize(self): + """Return the size of the buffer.""" + return self._buffer_size + + def GetFileNumber(self): + """Return the current file number of the storage.""" + return self._file_number + + def AddEventObject(self, event_object): + """Adds an event object to the storage. + + Args: + event_object: an event object (instance of EventObject). + + Raises: + IOError: When trying to write to a closed storage file. + """ + if not self._file_open: + raise IOError(u'Trying to add an entry to a closed storage file.') + + if event_object.timestamp > self._buffer_last_timestamp: + self._buffer_last_timestamp = event_object.timestamp + + # TODO: support negative timestamps. + if (event_object.timestamp < self._buffer_first_timestamp and + event_object.timestamp > 0): + self._buffer_first_timestamp = event_object.timestamp + + attributes = event_object.GetValues() + # Add values to counters. + if self._pre_obj: + self._pre_obj.counter['total'] += 1 + self._pre_obj.counter[attributes.get('parser', 'N/A')] += 1 + if 'plugin' in attributes: + self._pre_obj.plugin_counter[attributes.get('plugin', 'N/A')] += 1 + + # Add to temporary counter. + self._count_data_type[event_object.data_type] += 1 + parser = attributes.get('parser', 'unknown_parser') + self._count_parser[parser] += 1 + + event_object_data = self._event_object_serializer.WriteSerialized( + event_object) + + # TODO: Re-think this approach with the re-design of the storage. + # Check if the event object failed to serialize (none is returned). + if event_object_data is None: + return + + heapq.heappush( + self._buffer, (event_object.timestamp, event_object_data)) + self._buffer_size += len(event_object_data) + self._write_counter += 1 + + if self._buffer_size > self._max_buffer_size: + self._FlushBuffer() + + def AddEventObjects(self, event_objects): + """Adds an event objects to the storage. + + Args: + event_objects: a list or generator of event objects (instances of + EventObject). + """ + for event_object in event_objects: + self.AddEventObject(event_object) + + def HasTagging(self): + """Return a bool indicating whether or not a Tag file is stored.""" + for name in self._GetStreamNames(): + if 'plaso_tagging.' in name: + return True + return False + + def HasGrouping(self): + """Return a bool indicating whether or not a Group file is stored.""" + for name in self._GetStreamNames(): + if 'plaso_grouping.' in name: + return True + return False + + def HasReports(self): + """Return a bool indicating whether or not a Report file is stored.""" + for name in self._GetStreamNames(): + if 'plaso_report.' in name: + return True + + return False + + def StoreReport(self, analysis_report): + """Store an analysis report. + + Args: + analysis_report: An analysis report object (instance of AnalysisReport). + """ + report_number = 1 + for name in self._GetStreamNames(): + if 'plaso_report.' in name: + _, _, number_string = name.partition('.') + try: + number = int(number_string, 10) + except ValueError: + logging.error(u'Unable to read in report number.') + number = 0 + if number >= report_number: + report_number = number + 1 + + stream_name = 'plaso_report.{0:06}'.format(report_number) + serialized_report_proto = self._analysis_report_serializer.WriteSerialized( + analysis_report) + self._WriteStream(stream_name, serialized_report_proto) + + def GetReports(self): + """Read in all stored analysis reports from storage and yield them. + + Raises: + IOError: if the stream cannot be opened. + """ + for stream_name in self._GetStreamNames(): + if stream_name.startswith('plaso_report.'): + file_object = self._OpenStream(stream_name, 'r') + if file_object is None: + raise IOError(u'Unable to open stream: {0:s}'.format(stream_name)) + + report_string = file_object.read(self.MAX_REPORT_PROTOBUF_SIZE) + yield self._analysis_report_serializer.ReadSerialized(report_string) + + def StoreGrouping(self, rows): + """Store group information into the storage file. + + An EventGroup protobuf stores information about several + EventObjects that belong to the same behavior or action. It can then + be used to group similar events together to create a super event, or + a higher level event. + + This function is used to store that information inside the storage + file so it can be read later. + + The object that is passed in needs to have an iterator implemented + and has to implement the following attributes (optional names within + bracket): + name - The name of the grouped event. + [description] - More detailed description of the event. + [category] - If this group of events falls into a specific category. + [color] - To highlight this particular group with a HTML color tag. + [first_timestamp] - The first timestamp if applicable of the group. + [last_timestamp] - The last timestamp if applicable of the group. + events - A list of tuples (store_number and store_index of the + EventObject protobuf that belongs to this group of events). + + Args: + rows: An object that contains the necessary fields to construct + an EventGroup. Has to be a generator object or an object that implements + an iterator. + """ + group_number = 1 + if self.HasGrouping(): + for name in self._GetStreamNames(): + if 'plaso_grouping.' in name: + _, number = name.split('.') + if int(number) >= group_number: + group_number = int(number) + 1 + + group_packed = [] + size = 0 + for row in rows: + group = plaso_storage_pb2.EventGroup() + group.name = row.name + if hasattr(row, 'description'): + group.description = utils.GetUnicodeString(row.description) + if hasattr(row, 'category'): + group.category = utils.GetUnicodeString(row.category) + if hasattr(row, 'color'): + group.color = utils.GetUnicodeString(row.color) + + for number, index in row.events: + evt = group.events.add() + evt.store_number = int(number) + evt.store_index = int(index) + + if hasattr(row, 'first_timestamp'): + group.first_timestamp = int(row.first_timestamp) + if hasattr(row, 'last_timestamp'): + group.last_timestamp = int(row.last_timestamp) + + # TODO: implement event grouping. + group_str = group.SerializeToString() + packed = struct.pack(' self._max_buffer_size: + logging.warning(u'Grouping has outgrown buffer size.') + group_packed.append(packed) + + stream_name = 'plaso_grouping.{0:06d}'.format(group_number) + self._WriteStream(stream_name, ''.join(group_packed)) + + def StoreTagging(self, tags): + """Store tag information into the storage file. + + Each EventObject can be tagged either manually or automatically + to make analysis simpler, by providing more context to certain + events or to highlight events for later viewing. + + The object passed in needs to be a list (or otherwise an iterator) + that contains EventTag objects (event.EventTag). + + Args: + tags: A list or an object providing an iterator that contains + EventTag objects. + + Raises: + IOError: if the stream cannot be opened. + """ + if not self._pre_obj: + self._pre_obj = event.PreprocessObject() + + if not hasattr(self._pre_obj, 'collection_information'): + self._pre_obj.collection_information = {} + + self._pre_obj.collection_information['Action'] = 'Adding tags to storage.' + self._pre_obj.collection_information['time_of_run'] = ( + timelib.Timestamp.GetNow()) + if not hasattr(self._pre_obj, 'counter'): + self._pre_obj.counter = collections.Counter() + + tag_number = 1 + for name in self._GetStreamNames(): + if 'plaso_tagging.' in name: + _, number = name.split('.') + if int(number) >= tag_number: + tag_number = int(number) + 1 + if self._event_tag_index is None: + self._BuildTagIndex() + + tag_packed = [] + tag_index = [] + size = 0 + for tag in tags: + self._pre_obj.counter['Total Tags'] += 1 + if hasattr(tag, 'tags'): + for tag_entry in tag.tags: + self._pre_obj.counter[tag_entry] += 1 + + if self._event_tag_index is not None: + tag_index_value = self._event_tag_index.get(tag.string_key, None) + else: + tag_index_value = None + + # This particular event has already been tagged on a previous occasion, + # we need to make sure we are appending to that particular tag. + if tag_index_value is not None: + stream_name = 'plaso_tagging.{0:06d}'.format( + tag_index_value.store_number) + + tag_file_object = self._OpenStream(stream_name, 'r') + if tag_file_object is None: + raise IOError(u'Unable to open stream: {0:s}'.format(stream_name)) + + # Since zipfile.ZipExtFile is not seekable we need to read upto + # the store offset. + _ = tag_file_object.read(tag_index_value.store_offset) + + old_tag = self._ReadEventTag(tag_file_object) + + # TODO: move the append functionality into EventTag. + # Maybe name the function extend or update? + if hasattr(old_tag, 'tags'): + tag.tags.extend(old_tag.tags) + + if hasattr(old_tag, 'comment'): + if hasattr(tag, 'comment'): + tag.comment += old_tag.comment + else: + tag.comment = old_tag.comment + + if hasattr(old_tag, 'color') and not hasattr(tag, 'color'): + tag.color = old_tag.color + + serialized_event_tag = self._event_tag_serializer.WriteSerialized(tag) + + # TODO: move to write class function of _EventTagIndexValue. + packed = ( + struct.pack(' cls.TIMESTAMP_MAX_SECONDS): + return 0 + + return cls.FromPosixTime(int(posix_time)) + + @classmethod + def FromFatDateTime(cls, fat_date_time): + """Converts a FAT date and time into a timestamp. + + FAT date time is mainly used in DOS/Windows file formats and FAT. + + The FAT date and time is a 32-bit value containing two 16-bit values: + * The date (lower 16-bit). + * bits 0 - 4: day of month, where 1 represents the first day + * bits 5 - 8: month of year, where 1 represent January + * bits 9 - 15: year since 1980 + * The time of day (upper 16-bit). + * bits 0 - 4: seconds (in 2 second intervals) + * bits 5 - 10: minutes + * bits 11 - 15: hours + + Args: + fat_date_time: The 32-bit FAT date time. + + Returns: + An integer containing the timestamp or 0 on error. + """ + number_of_seconds = cls.FAT_DATE_TO_POSIX_BASE + + day_of_month = (fat_date_time & 0x1f) - 1 + month = ((fat_date_time >> 5) & 0x0f) - 1 + year = (fat_date_time >> 9) & 0x7f + + if day_of_month < 0 or day_of_month > 30 or month < 0 or month > 11: + return 0 + + number_of_days = cls.DayOfYear(day_of_month, month, 1980 + year) + for past_year in range(0, year): + number_of_days += cls.DaysInYear(past_year) + + fat_date_time >>= 16 + + seconds = (fat_date_time & 0x1f) * 2 + minutes = (fat_date_time >> 5) & 0x3f + hours = (fat_date_time >> 11) & 0x1f + + if hours > 23 or minutes > 59 or seconds > 59: + return 0 + + number_of_seconds += (((hours * 60) + minutes) * 60) + seconds + + number_of_seconds += number_of_days * cls.SECONDS_PER_DAY + + return number_of_seconds * cls.MICRO_SECONDS_PER_SECOND + + @classmethod + def FromFiletime(cls, filetime): + """Converts a FILETIME into a timestamp. + + FILETIME is mainly used in Windows file formats and NTFS. + + The FILETIME is a 64-bit value containing: + 100th nano seconds since 1601-01-01 00:00:00 + + Technically FILETIME consists of 2 x 32-bit parts and is presumed + to be unsigned. + + Args: + filetime: The 64-bit FILETIME timestamp. + + Returns: + An integer containing the timestamp or 0 on error. + """ + # TODO: Add a handling for if the timestamp equals to zero. + if filetime < 0: + return 0 + timestamp = (filetime - cls.FILETIME_TO_POSIX_BASE) / 10 + + if timestamp > cls.TIMESTAMP_MAX_MICRO_SECONDS: + return 0 + return timestamp + + @classmethod + def FromHfsTime(cls, hfs_time, timezone=pytz.utc, is_dst=False): + """Converts a HFS time to a timestamp. + + HFS time is the same as HFS+ time, except stored in the local + timezone of the user. + + Args: + hfs_time: Timestamp in the hfs format (32 bit unsigned int). + timezone: The timezone object of the system's local time. + is_dst: A boolean to indicate the timestamp is corrected for daylight + savings time (DST) only used for the DST transition period. + The default is false. + + Returns: + An integer containing the timestamp or 0 on error. + """ + timestamp_local = cls.FromHfsPlusTime(hfs_time) + return cls.LocaltimeToUTC(timestamp_local, timezone, is_dst) + + @classmethod + def FromHfsPlusTime(cls, hfs_time): + """Converts a HFS+ time to a timestamp. + + In HFS+ date and time values are stored in an unsigned 32-bit integer + containing the number of seconds since January 1, 1904 at 00:00:00 + (midnight) UTC (GMT). + + Args: + hfs_time: The timestamp in HFS+ format. + + Returns: + An integer containing the timestamp or 0 on error. + """ + return cls.FromPosixTime(hfs_time - cls.HFSTIME_TO_POSIX_BASE) + + @classmethod + def FromJavaTime(cls, java_time): + """Converts a Java time to a timestamp. + + Jave time is the number of milliseconds since + January 1, 1970, 00:00:00 UTC. + + URL: http://docs.oracle.com/javase/7/docs/api/ + java/sql/Timestamp.html#getTime%28%29 + + Args: + java_time: The Java Timestamp. + + Returns: + An integer containing the timestamp or 0 on error. + """ + return java_time * cls.MILLI_SECONDS_TO_MICRO_SECONDS + + @classmethod + def FromPosixTime(cls, posix_time): + """Converts a POSIX timestamp into a timestamp. + + The POSIX time is a signed 32-bit or 64-bit value containing: + seconds since 1970-01-01 00:00:00 + + Args: + posix_time: The POSIX timestamp. + + Returns: + An integer containing the timestamp or 0 on error. + """ + if (posix_time < cls.TIMESTAMP_MIN_SECONDS or + posix_time > cls.TIMESTAMP_MAX_SECONDS): + return 0 + return int(posix_time) * cls.MICRO_SECONDS_PER_SECOND + + @classmethod + def FromPosixTimeWithMicrosecond(cls, posix_time, microsecond): + """Converts a POSIX timestamp with microsecond into a timestamp. + + The POSIX time is a signed 32-bit or 64-bit value containing: + seconds since 1970-01-01 00:00:00 + + Args: + posix_time: The POSIX timestamp. + microsecond: The microseconds to add to the timestamp. + + Returns: + An integer containing the timestamp or 0 on error. + """ + timestamp = cls.FromPosixTime(posix_time) + if not timestamp: + return 0 + return timestamp + microsecond + + @classmethod + def FromPythonDatetime(cls, datetime_object): + """Converts a Python datetime object into a timestamp.""" + if not isinstance(datetime_object, datetime.datetime): + return 0 + + posix_epoch = int(calendar.timegm(datetime_object.utctimetuple())) + epoch = cls.FromPosixTime(posix_epoch) + return epoch + datetime_object.microsecond + + @classmethod + def FromTimeParts( + cls, year, month, day, hour, minutes, seconds, microseconds=0, + timezone=pytz.utc): + """Converts a list of time entries to a timestamp. + + Args: + year: An integer representing the year. + month: An integer between 1 and 12. + day: An integer representing the number of day in the month. + hour: An integer representing the hour, 0 <= hour < 24. + minutes: An integer, 0 <= minute < 60. + seconds: An integer, 0 <= second < 60. + microseconds: Optional number of microseconds ranging from: + 0 <= microsecond < 1000000. The default is 0. + timezone: Optional timezone (instance of pytz.timezone). + The default is UTC. + + Returns: + An integer containing the timestamp or 0 on error. + """ + try: + date = datetime.datetime( + year, month, day, hour, minutes, seconds, microseconds) + except ValueError as exception: + logging.warning(( + u'Unable to create timestamp from {0:04d}-{1:02d}-{2:02d} ' + u'{3:02d}:{4:02d}:{5:02d}.{6:06d} with error: {7:s}').format( + year, month, day, hour, minutes, seconds, microseconds, + exception)) + return 0 + + if type(timezone) is str: + timezone = pytz.timezone(timezone) + + date_use = timezone.localize(date) + epoch = int(calendar.timegm(date_use.utctimetuple())) + + return cls.FromPosixTime(epoch) + microseconds + + @classmethod + def FromTimeString( + cls, time_string, timezone=pytz.utc, dayfirst=False, + gmt_as_timezone=True): + """Converts a string containing a date and time value into a timestamp. + + Args: + time_string: String that contains a date and time value. + timezone: Optional timezone object (instance of pytz.timezone) that + the data and time value in the string represents. This value + is used when the timezone cannot be determined from the string. + dayfirst: An optional boolean argument. If set to true then the + parser will change the precedence in which it parses timestamps + from MM-DD-YYYY to DD-MM-YYYY (and YYYY-MM-DD will be + YYYY-DD-MM, etc). + gmt_as_timezone: Sometimes the dateutil parser will interpret GMT and UTC + the same way, that is not make a distinction. By default + this is set to true, that is GMT can be intepreted + differently than UTC. If that is not the expected result + this attribute can be set to false. + + Returns: + An integer containing the timestamp or 0 on error. + """ + datetime_object = StringToDatetime( + time_string, timezone=timezone, dayfirst=dayfirst, + gmt_as_timezone=gmt_as_timezone) + return cls.FromPythonDatetime(datetime_object) + + @classmethod + def FromWebKitTime(cls, webkit_time): + """Converts a WebKit time into a timestamp. + + The WebKit time is a 64-bit value containing: + micro seconds since 1601-01-01 00:00:00 + + Args: + webkit_time: The 64-bit WebKit time timestamp. + + Returns: + An integer containing the timestamp or 0 on error. + """ + if webkit_time < (cls.TIMESTAMP_MIN_MICRO_SECONDS + + cls.WEBKIT_TIME_TO_POSIX_BASE): + return 0 + return webkit_time - cls.WEBKIT_TIME_TO_POSIX_BASE + + @classmethod + def GetNow(cls): + """Retrieves the current time (now) as a timestamp in UTC.""" + time_elements = time.gmtime() + return calendar.timegm(time_elements) * 1000000 + + @classmethod + def IsLeapYear(cls, year): + """Determines if a year is a leap year. + + A leap year is dividable by 4 and not by 100 or by 400 + without a remainder. + + Args: + year: The year as in 1970. + + Returns: + A boolean value indicating the year is a leap year. + """ + return (year % 4 == 0 and year % 100 != 0) or year % 400 == 0 + + @classmethod + def LocaltimeToUTC(cls, timestamp, timezone, is_dst=False): + """Converts the timestamp in localtime of the timezone to UTC. + + Args: + timestamp: An integer containing the timestamp. + timezone: The timezone (pytz.timezone) object. + is_dst: A boolean to indicate the timestamp is corrected for daylight + savings time (DST) only used for the DST transition period. + The default is false. + + Returns: + An integer containing the timestamp or 0 on error. + """ + if timezone and timezone != pytz.utc: + datetime_object = ( + datetime.datetime(1970, 1, 1, 0, 0, 0, 0, tzinfo=None) + + datetime.timedelta(microseconds=timestamp)) + + # Check if timezone is UTC since utcoffset() does not support is_dst + # for UTC and will raise. + datetime_delta = timezone.utcoffset(datetime_object, is_dst=is_dst) + seconds_delta = int(datetime_delta.total_seconds()) + timestamp -= seconds_delta * cls.MICRO_SECONDS_PER_SECOND + + return timestamp + + @classmethod + def RoundToSeconds(cls, timestamp): + """Takes a timestamp value and rounds it to a second precision.""" + leftovers = timestamp % cls.MICRO_SECONDS_PER_SECOND + scrubbed = timestamp - leftovers + rounded = round(float(leftovers) / cls.MICRO_SECONDS_PER_SECOND) + + return int(scrubbed + rounded * cls.MICRO_SECONDS_PER_SECOND) + + +def StringToDatetime( + time_string, timezone=pytz.utc, dayfirst=False, gmt_as_timezone=True): + """Converts a string representation of a timestamp into a datetime object. + + Args: + time_string: String that contains a date and time value. + timezone: Optional timezone object (instance of pytz.timezone) that + the data and time value in the string represents. This value + is used when the timezone cannot be determined from the string. + dayfirst: An optional boolean argument. If set to true then the + parser will change the precedence in which it parses timestamps + from MM-DD-YYYY to DD-MM-YYYY (and YYYY-MM-DD will be YYYY-DD-MM, + etc). + gmt_as_timezone: Sometimes the dateutil parser will interpret GMT and UTC + the same way, that is not make a distinction. By default + this is set to true, that is GMT can be intepreted + differently than UTC. If that is not the expected result + this attribute can be set to false. + + Returns: + A datetime object. + """ + if not gmt_as_timezone and time_string.endswith(' GMT'): + time_string = u'{0:s}UTC'.format(time_string[:-3]) + + try: + datetime_object = dateutil.parser.parse(time_string, dayfirst=dayfirst) + + except (TypeError, ValueError) as exception: + logging.error( + u'Unable to copy {0:s} to a datetime object with error: {1:s}'.format( + time_string, exception)) + return datetime.datetime(1970, 1, 1, 0, 0, 0, 0, tzinfo=pytz.utc) + + if datetime_object.tzinfo: + return datetime_object.astimezone(pytz.utc) + + return timezone.localize(datetime_object) + + +def GetCurrentYear(): + """Determines the current year.""" + datetime_object = datetime.datetime.now() + return datetime_object.year diff --git a/plaso/lib/timelib_test.py b/plaso/lib/timelib_test.py new file mode 100644 index 0000000..4af7054 --- /dev/null +++ b/plaso/lib/timelib_test.py @@ -0,0 +1,531 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a unit test for the timelib in Plaso.""" + +import calendar +import datetime +import unittest + +from plaso.lib import timelib + +import pytz + + +def CopyStringToTimestamp(time_string): + """Copies a string containing a date and time value to a timestamp. + + Test function that does not rely on dateutil parser. + + Args: + time_string: A string containing a date and time value formatted as: + YYYY-MM-DD hh:mm:ss.######[+-]##:## + Where # are numeric digits ranging from 0 to 9 and the seconds + fraction can be either 3 or 6 digits. Both the seconds fraction + and timezone offset are optional. The default timezone is UTC. + + Returns: + An integer containing the timestamp. + + Raises: + ValueError: if the time string is invalid or not supported. + """ + time_string_length = len(time_string) + + # The time string should at least contain 'YYYY-MM-DD hh:mm:ss'. + if (time_string_length < 19 or time_string[4] != '-' or + time_string[7] != '-' or time_string[10] != ' ' or + time_string[13] != ':' or time_string[16] != ':'): + raise ValueError(u'Invalid time string.') + + try: + year = int(time_string[0:4], 10) + except ValueError: + raise ValueError(u'Unable to parse year.') + + try: + month = int(time_string[5:7], 10) + except ValueError: + raise ValueError(u'Unable to parse month.') + + if month not in range(1, 13): + raise ValueError(u'Month value out of bounds.') + + try: + day_of_month = int(time_string[8:10], 10) + except ValueError: + raise ValueError(u'Unable to parse day of month.') + + if day_of_month not in range(1, 32): + raise ValueError(u'Day of month value out of bounds.') + + try: + hours = int(time_string[11:13], 10) + except ValueError: + raise ValueError(u'Unable to parse hours.') + + if hours not in range(0, 24): + raise ValueError(u'Hours value out of bounds.') + + try: + minutes = int(time_string[14:16], 10) + except ValueError: + raise ValueError(u'Unable to parse minutes.') + + if minutes not in range(0, 60): + raise ValueError(u'Minutes value out of bounds.') + + try: + seconds = int(time_string[17:19], 10) + except ValueError: + raise ValueError(u'Unable to parse day of seconds.') + + if seconds not in range(0, 60): + raise ValueError(u'Seconds value out of bounds.') + + micro_seconds = 0 + timezone_offset = 0 + + if time_string_length > 19: + if time_string[19] != '.': + timezone_index = 19 + else: + for timezone_index in range(19, time_string_length): + if time_string[timezone_index] in ['+', '-']: + break + + # The calculation that follow rely on the timezone index to point + # beyond the string in case no timezone offset was defined. + if timezone_index == time_string_length - 1: + timezone_index += 1 + + if timezone_index > 19: + fraction_of_seconds_length = timezone_index - 20 + if fraction_of_seconds_length not in [3, 6]: + raise ValueError(u'Invalid time string.') + + try: + micro_seconds = int(time_string[20:timezone_index], 10) + except ValueError: + raise ValueError(u'Unable to parse fraction of seconds.') + + if fraction_of_seconds_length == 3: + micro_seconds *= 1000 + + if timezone_index < time_string_length: + if (time_string_length - timezone_index != 6 or + time_string[timezone_index + 3] != ':'): + raise ValueError(u'Invalid time string.') + + try: + timezone_offset = int(time_string[ + timezone_index + 1:timezone_index + 3]) + except ValueError: + raise ValueError(u'Unable to parse timezone hours offset.') + + if timezone_offset not in range(0, 24): + raise ValueError(u'Timezone hours offset value out of bounds.') + + # Note that when the sign of the timezone offset is negative + # the difference needs to be added. We do so by flipping the sign. + if time_string[timezone_index] == '-': + timezone_offset *= 60 + else: + timezone_offset *= -60 + + try: + timezone_offset += int(time_string[ + timezone_index + 4:timezone_index + 6]) + except ValueError: + raise ValueError(u'Unable to parse timezone minutes offset.') + + timezone_offset *= 60 + + timestamp = int(calendar.timegm(( + year, month, day_of_month, hours, minutes, seconds))) + + return ((timestamp + timezone_offset) * 1000000) + micro_seconds + + +class TimeLibUnitTest(unittest.TestCase): + """A unit test for the timelib.""" + + def testCocoaTime(self): + """Tests the Cocoa timestamp conversion.""" + self.assertEquals( + timelib.Timestamp.FromCocoaTime(395011845), + CopyStringToTimestamp('2013-07-08 21:30:45')) + + self.assertEquals( + timelib.Timestamp.FromCocoaTime(395353142), + CopyStringToTimestamp('2013-07-12 20:19:02')) + + self.assertEquals( + timelib.Timestamp.FromCocoaTime(394993669), + CopyStringToTimestamp('2013-07-08 16:27:49')) + + def testHFSTimes(self): + """Tests the HFS timestamp conversion.""" + self.assertEquals( + timelib.Timestamp.FromHfsTime( + 3458215528, timezone=pytz.timezone('EST5EDT'), is_dst=True), + CopyStringToTimestamp('2013-08-01 15:25:28-04:00')) + + self.assertEquals( + timelib.Timestamp.FromHfsPlusTime(3458215528), + CopyStringToTimestamp('2013-08-01 15:25:28')) + + self.assertEquals( + timelib.Timestamp.FromHfsPlusTime(3413373928), + CopyStringToTimestamp('2012-02-29 15:25:28')) + + def testTimestampIsLeapYear(self): + """Tests the is leap year check.""" + self.assertEquals(timelib.Timestamp.IsLeapYear(2012), True) + self.assertEquals(timelib.Timestamp.IsLeapYear(2013), False) + self.assertEquals(timelib.Timestamp.IsLeapYear(2000), True) + self.assertEquals(timelib.Timestamp.IsLeapYear(1900), False) + + def testTimestampDaysInMonth(self): + """Tests the days in month function.""" + self.assertEquals(timelib.Timestamp.DaysInMonth(0, 2013), 31) + self.assertEquals(timelib.Timestamp.DaysInMonth(1, 2013), 28) + self.assertEquals(timelib.Timestamp.DaysInMonth(1, 2012), 29) + self.assertEquals(timelib.Timestamp.DaysInMonth(2, 2013), 31) + self.assertEquals(timelib.Timestamp.DaysInMonth(3, 2013), 30) + self.assertEquals(timelib.Timestamp.DaysInMonth(4, 2013), 31) + self.assertEquals(timelib.Timestamp.DaysInMonth(5, 2013), 30) + self.assertEquals(timelib.Timestamp.DaysInMonth(6, 2013), 31) + self.assertEquals(timelib.Timestamp.DaysInMonth(7, 2013), 31) + self.assertEquals(timelib.Timestamp.DaysInMonth(8, 2013), 30) + self.assertEquals(timelib.Timestamp.DaysInMonth(9, 2013), 31) + self.assertEquals(timelib.Timestamp.DaysInMonth(10, 2013), 30) + self.assertEquals(timelib.Timestamp.DaysInMonth(11, 2013), 31) + + with self.assertRaises(ValueError): + timelib.Timestamp.DaysInMonth(-1, 2013) + + with self.assertRaises(ValueError): + timelib.Timestamp.DaysInMonth(12, 2013) + + def testTimestampDaysInYear(self): + """Test the days in year function.""" + self.assertEquals(timelib.Timestamp.DaysInYear(2013), 365) + self.assertEquals(timelib.Timestamp.DaysInYear(2012), 366) + + def testTimestampDayOfYear(self): + """Test the day of year function.""" + self.assertEquals(timelib.Timestamp.DayOfYear(0, 0, 2013), 0) + self.assertEquals(timelib.Timestamp.DayOfYear(0, 2, 2013), 31 + 28) + self.assertEquals(timelib.Timestamp.DayOfYear(0, 2, 2012), 31 + 29) + self.assertEquals(timelib.Timestamp.DayOfYear(0, 11, 2013), + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30) + + def testTimestampFromDelphiTime(self): + """Test the Delphi date time conversion.""" + self.assertEquals( + timelib.Timestamp.FromDelphiTime(41443.8263953), + CopyStringToTimestamp('2013-06-18 19:50:00')) + + def testTimestampFromFatDateTime(self): + """Test the FAT date time conversion.""" + self.assertEquals( + timelib.Timestamp.FromFatDateTime(0xa8d03d0c), + CopyStringToTimestamp('2010-08-12 21:06:32')) + + # Invalid number of seconds. + fat_date_time = (0xa8d03d0c & ~(0x1f << 16)) | ((30 & 0x1f) << 16) + self.assertEquals(timelib.Timestamp.FromFatDateTime(fat_date_time), 0) + + # Invalid number of minutes. + fat_date_time = (0xa8d03d0c & ~(0x3f << 21)) | ((60 & 0x3f) << 21) + self.assertEquals(timelib.Timestamp.FromFatDateTime(fat_date_time), 0) + + # Invalid number of hours. + fat_date_time = (0xa8d03d0c & ~(0x1f << 27)) | ((24 & 0x1f) << 27) + self.assertEquals(timelib.Timestamp.FromFatDateTime(fat_date_time), 0) + + # Invalid day of month. + fat_date_time = (0xa8d03d0c & ~0x1f) | (32 & 0x1f) + self.assertEquals(timelib.Timestamp.FromFatDateTime(fat_date_time), 0) + + # Invalid month. + fat_date_time = (0xa8d03d0c & ~(0x0f << 5)) | ((13 & 0x0f) << 5) + self.assertEquals(timelib.Timestamp.FromFatDateTime(fat_date_time), 0) + + def testTimestampFromWebKitTime(self): + """Test the WebKit time conversion.""" + self.assertEquals( + timelib.Timestamp.FromWebKitTime(0x2dec3d061a9bfb), + CopyStringToTimestamp('2010-08-12 21:06:31.546875')) + + webkit_time = 86400 * 1000000 + self.assertEquals( + timelib.Timestamp.FromWebKitTime(webkit_time), + CopyStringToTimestamp('1601-01-02 00:00:00')) + + # WebKit time that exceeds lower bound. + webkit_time = -((1 << 63L) - 1) + self.assertEquals(timelib.Timestamp.FromWebKitTime(webkit_time), 0) + + def testTimestampFromFiletime(self): + """Test the FILETIME conversion.""" + self.assertEquals( + timelib.Timestamp.FromFiletime(0x01cb3a623d0a17ce), + CopyStringToTimestamp('2010-08-12 21:06:31.546875')) + + filetime = 86400 * 10000000 + self.assertEquals( + timelib.Timestamp.FromFiletime(filetime), + CopyStringToTimestamp('1601-01-02 00:00:00')) + + # FILETIME that exceeds lower bound. + filetime = -1 + self.assertEquals(timelib.Timestamp.FromFiletime(filetime), 0) + + def testTimestampFromPosixTime(self): + """Test the POSIX time conversion.""" + self.assertEquals( + timelib.Timestamp.FromPosixTime(1281647191), + CopyStringToTimestamp('2010-08-12 21:06:31')) + + self.assertEquals( + timelib.Timestamp.FromPosixTime(-122557518), + timelib.Timestamp.FromTimeString('1966-02-12 1966 12:14:42 UTC')) + + # POSIX time that exceeds upper bound. + self.assertEquals(timelib.Timestamp.FromPosixTime(9223372036855), 0) + + # POSIX time that exceeds lower bound. + self.assertEquals(timelib.Timestamp.FromPosixTime(-9223372036855), 0) + + def testMonthDict(self): + """Test the month dict, both inside and outside of scope.""" + self.assertEquals(timelib.MONTH_DICT['nov'], 11) + self.assertEquals(timelib.MONTH_DICT['jan'], 1) + self.assertEquals(timelib.MONTH_DICT['may'], 5) + + month = timelib.MONTH_DICT.get('doesnotexist') + self.assertEquals(month, None) + + def testLocaltimeToUTC(self): + """Test the localtime to UTC conversion.""" + timezone = pytz.timezone('CET') + + local_timestamp = CopyStringToTimestamp('2013-01-01 01:00:00') + self.assertEquals( + timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone), + CopyStringToTimestamp('2013-01-01 00:00:00')) + + local_timestamp = CopyStringToTimestamp('2013-07-01 02:00:00') + self.assertEquals( + timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone), + CopyStringToTimestamp('2013-07-01 00:00:00')) + + # In the local timezone this is a non-existent timestamp. + local_timestamp = CopyStringToTimestamp('2013-03-31 02:00:00') + with self.assertRaises(pytz.NonExistentTimeError): + timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone, is_dst=None) + + self.assertEquals( + timelib.Timestamp.LocaltimeToUTC( + local_timestamp, timezone, is_dst=True), + CopyStringToTimestamp('2013-03-31 00:00:00')) + + self.assertEquals( + timelib.Timestamp.LocaltimeToUTC( + local_timestamp, timezone, is_dst=False), + CopyStringToTimestamp('2013-03-31 01:00:00')) + + # In the local timezone this is an ambiguous timestamp. + local_timestamp = CopyStringToTimestamp('2013-10-27 02:30:00') + + with self.assertRaises(pytz.AmbiguousTimeError): + timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone, is_dst=None) + + self.assertEquals( + timelib.Timestamp.LocaltimeToUTC( + local_timestamp, timezone, is_dst=True), + CopyStringToTimestamp('2013-10-27 00:30:00')) + + self.assertEquals( + timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone), + CopyStringToTimestamp('2013-10-27 01:30:00')) + + # Use the UTC timezone. + self.assertEquals( + timelib.Timestamp.LocaltimeToUTC(local_timestamp, pytz.utc), + local_timestamp) + + # Use a timezone in the Western Hemisphere. + timezone = pytz.timezone('EST') + + local_timestamp = CopyStringToTimestamp('2013-01-01 00:00:00') + self.assertEquals( + timelib.Timestamp.LocaltimeToUTC(local_timestamp, timezone), + CopyStringToTimestamp('2013-01-01 05:00:00')) + + def testCopyToDatetime(self): + """Test the copy to datetime object.""" + timezone = pytz.timezone('CET') + + timestamp = CopyStringToTimestamp('2013-03-14 20:20:08.850041') + self.assertEquals( + timelib.Timestamp.CopyToDatetime(timestamp, timezone), + datetime.datetime(2013, 3, 14, 21, 20, 8, 850041, tzinfo=timezone)) + + def testCopyToPosix(self): + """Test converting microseconds to seconds.""" + timestamp = CopyStringToTimestamp('2013-10-01 12:00:00') + self.assertEquals( + timelib.Timestamp.CopyToPosix(timestamp), + timestamp // 1000000) + + def testTimestampFromTimeString(self): + """The the FromTimeString function.""" + # Test daylight savings. + expected_timestamp = CopyStringToTimestamp('2013-10-01 12:00:00') + + # Check certain variance of this timestamp. + timestamp = timelib.Timestamp.FromTimeString( + '2013-10-01 14:00:00', pytz.timezone('Europe/Rome')) + self.assertEquals(timestamp, expected_timestamp) + + timestamp = timelib.Timestamp.FromTimeString( + '2013-10-01 12:00:00', pytz.timezone('UTC')) + self.assertEquals(timestamp, expected_timestamp) + + timestamp = timelib.Timestamp.FromTimeString( + '2013-10-01 05:00:00', pytz.timezone('PST8PDT')) + self.assertEquals(timestamp, expected_timestamp) + + # Now to test outside of the daylight savings. + expected_timestamp = CopyStringToTimestamp('2014-02-01 12:00:00') + + timestamp = timelib.Timestamp.FromTimeString( + '2014-02-01 13:00:00', pytz.timezone('Europe/Rome')) + self.assertEquals(timestamp, expected_timestamp) + + timestamp = timelib.Timestamp.FromTimeString( + '2014-02-01 12:00:00', pytz.timezone('UTC')) + self.assertEquals(timestamp, expected_timestamp) + + timestamp = timelib.Timestamp.FromTimeString( + '2014-02-01 04:00:00', pytz.timezone('PST8PDT')) + self.assertEquals(timestamp, expected_timestamp) + + # Define two timestamps, one being GMT and the other UTC. + time_string_utc = 'Wed 05 May 2010 03:52:31 UTC' + time_string_gmt = 'Wed 05 May 2010 03:52:31 GMT' + + timestamp_utc = timelib.Timestamp.FromTimeString(time_string_utc) + timestamp_gmt = timelib.Timestamp.FromTimeString(time_string_gmt) + + # Test if these two are different, and if so, then we'll try again + # using the 'gmt_is_utc' flag, which then should result to the same + # results. + if timestamp_utc != timestamp_gmt: + self.assertEquals(timestamp_utc, timelib.Timestamp.FromTimeString( + time_string_gmt, gmt_as_timezone=False)) + + def testRoundTimestamp(self): + """Test the RoundToSeconds function.""" + # Should be rounded up. + test_one = 442813351785412 + # Should be rounded down. + test_two = 1384381247271976 + + self.assertEquals( + timelib.Timestamp.RoundToSeconds(test_one), 442813352000000) + self.assertEquals( + timelib.Timestamp.RoundToSeconds(test_two), 1384381247000000) + + def testTimestampFromTimeParts(self): + """Test the FromTimeParts function.""" + timestamp = timelib.Timestamp.FromTimeParts( + 2013, 6, 25, 22, 19, 46, 0, timezone=pytz.timezone('PST8PDT')) + self.assertEquals( + timestamp, CopyStringToTimestamp('2013-06-25 22:19:46-07:00')) + + timestamp = timelib.Timestamp.FromTimeParts(2013, 6, 26, 5, 19, 46) + self.assertEquals( + timestamp, CopyStringToTimestamp('2013-06-26 05:19:46')) + + timestamp = timelib.Timestamp.FromTimeParts( + 2013, 6, 26, 5, 19, 46, 542) + self.assertEquals( + timestamp, CopyStringToTimestamp('2013-06-26 05:19:46.000542')) + + def _TestStringToDatetime( + self, expected_timestamp, time_string, timezone=pytz.utc, dayfirst=False): + """Tests the StringToDatetime function. + + Args: + expected_timestamp: The expected timesamp. + time_string: String that contains a date and time value. + timezone: The timezone (pytz.timezone) object. + dayfirst: Change precedence of day vs. month. + + Returns: + A result object. + """ + date_time = timelib.StringToDatetime( + time_string, timezone=timezone, dayfirst=dayfirst) + timestamp = int(calendar.timegm((date_time.utctimetuple()))) + self.assertEquals(timestamp, expected_timestamp) + + def testStringToDatetime(self): + """Test the StringToDatetime function.""" + self._TestStringToDatetime( + 471953580, '12-15-1984 05:13:00', timezone=pytz.timezone('EST5EDT')) + + # Swap day and month. + self._TestStringToDatetime( + 466420380, '12-10-1984 05:13:00', timezone=pytz.timezone('EST5EDT'), + dayfirst=True) + + self._TestStringToDatetime(471953580, '12-15-1984 10:13:00Z') + + # Setting the timezone for string that already contains a timezone + # indicator should not affect the conversion. + self._TestStringToDatetime( + 471953580, '12-15-1984 10:13:00Z', timezone=pytz.timezone('EST5EDT')) + + self._TestStringToDatetime(471953580, '15/12/1984 10:13:00Z') + + self._TestStringToDatetime(471953580, '15-12-84 10:13:00Z') + + self._TestStringToDatetime( + 471967980, '15-12-84 10:13:00-04', timezone=pytz.timezone('EST5EDT')) + + self._TestStringToDatetime( + 0, 'thisisnotadatetime', timezone=pytz.timezone('EST5EDT')) + + self._TestStringToDatetime( + 471953580, '12-15-1984 04:13:00', + timezone=pytz.timezone('America/Chicago')) + + self._TestStringToDatetime( + 458712780, '07-14-1984 23:13:00', + timezone=pytz.timezone('America/Chicago')) + + self._TestStringToDatetime( + 471964380, '12-15-1984 05:13:00', timezone=pytz.timezone('US/Pacific')) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/lib/utils.py b/plaso/lib/utils.py new file mode 100644 index 0000000..3e03b70 --- /dev/null +++ b/plaso/lib/utils.py @@ -0,0 +1,199 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains utility functions.""" + +import logging + +from plaso.lib import errors +from plaso.lib import lexer + + +RESERVED_VARIABLES = frozenset( + ['username', 'inode', 'hostname', 'body', 'parser', 'regvalue', 'timestamp', + 'timestamp_desc', 'source_short', 'source_long', 'timezone', 'filename', + 'display_name', 'pathspec', 'offset', 'store_number', 'store_index', + 'tag', 'data_type', 'metadata', 'http_headers', 'query', 'mapped_files', + 'uuid']) + + +def IsText(bytes_in, encoding=None): + """Examine the bytes in and determine if they are indicative of a text. + + Parsers need quick and at least semi reliable method of discovering whether + or not a particular byte stream is a text or resembles text or not. This can + be used in text parsers to determine if a file is a text file or not for + instance. + + The method assumes the byte sequence is either ASCII, UTF-8, UTF-16 or method + supplied character encoding. Otherwise it will make the assumption the byte + sequence is not text, but a byte sequence. + + Args: + bytes_in: The byte sequence passed to the method that needs examination. + encoding: Optional encoding to test, if not defined only ASCII, UTF-8 and + UTF-16 are tried. + + Returns: + Boolean value indicating whether or not the byte sequence is a text or not. + """ + # TODO: Improve speed and accuracy of this method. + # Start with the assumption we are dealing with a text. + is_ascii = True + + # Check if this is ASCII text string. + for char in bytes_in: + if not 31 < ord(char) < 128: + is_ascii = False + break + + # We have an ASCII string. + if is_ascii: + return is_ascii + + # Is this already a unicode text? + if type(bytes_in) == unicode: + return True + + # Check if this is UTF-8 + try: + _ = bytes_in.decode('utf-8') + return True + except UnicodeDecodeError: + pass + + # TODO: UTF 16 decode is successful in too + # many edge cases where we are not really dealing with + # a text at all. Leaving this out for now, consider + # re-enabling or making a better determination. + #try: + # _ = bytes_in.decode('utf-16-le') + # return True + #except UnicodeDecodeError: + # pass + + if encoding: + try: + _ = bytes_in.decode(encoding) + return True + except UnicodeDecodeError: + pass + except LookupError: + logging.error( + u'String encoding not recognized: {0:s}'.format(encoding)) + + return False + + +def GetBaseName(path): + """Returns back a basename for a path (could be Windows or *NIX separated).""" + # First check the case where both forward and backward slash are in the path. + if '/' and '\\' in path: + # Let's count slashes and guess which one is the right one. + forward_count = len(path.split('/')) + backward_count = len(path.split('\\')) + + if forward_count > backward_count: + _, _, base = path.rpartition('/') + else: + _, _, base = path.rpartition('\\') + + return base + + # Now we are sure there is only one type of separators. + if '/' in path: + _, _, base = path.rpartition('/') + else: + _, _, base = path.rpartition('\\') + + return base + + +def GetUnicodeString(string): + """Converts the string to Unicode if necessary.""" + if type(string) != unicode: + return str(string).decode('utf8', 'ignore') + return string + + +class PathReplacer(lexer.Lexer): + """Replace path variables with values gathered from earlier preprocessing.""" + + tokens = [ + lexer.Token('.', '{{([^}]+)}}', 'ReplaceVariable', ''), + lexer.Token('.', '{([^}]+)}', 'ReplaceString', ''), + lexer.Token('.', '([^{])', 'ParseString', ''), + ] + + def __init__(self, pre_obj, data=''): + """Constructor for a path replacer.""" + super(PathReplacer, self).__init__(data) + self._path = [] + self._pre_obj = pre_obj + + def GetPath(self): + """Run the lexer and replace path.""" + while True: + _ = self.NextToken() + if self.Empty(): + break + + return u''.join(self._path) + + def ParseString(self, match, **_): + """Append a string to the path.""" + self._path.append(match.group(1)) + + def ReplaceVariable(self, match, **_): + """Replace a string that should not be a variable.""" + self._path.append(u'{{{0:s}}}'.format(match.group(1))) + + def ReplaceString(self, match, **_): + """Replace a variable with a given attribute.""" + replace = getattr(self._pre_obj, match.group(1), None) + + if replace: + self._path.append(replace) + else: + raise errors.PathNotFound( + u'Path variable: {} not discovered yet.'.format(match.group(1))) + + +def GetInodeValue(inode_raw): + """Read in a 'raw' inode value and try to convert it into an integer. + + Args: + inode_raw: A string or an int inode value. + + Returns: + An integer inode value. + """ + if type(inode_raw) in (int, long): + return inode_raw + + if type(inode_raw) is float: + return int(inode_raw) + + try: + return int(inode_raw) + except ValueError: + # Let's do one more attempt. + inode_string, _, _ = str(inode_raw).partition('-') + try: + return int(inode_string) + except ValueError: + return -1 diff --git a/plaso/lib/utils_test.py b/plaso/lib/utils_test.py new file mode 100644 index 0000000..4de0230 --- /dev/null +++ b/plaso/lib/utils_test.py @@ -0,0 +1,48 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the unit tests for the utils library of methods.""" +import unittest + +from plaso.lib import utils + + +class UtilsTestCase(unittest.TestCase): + """The unit test for utils method collection.""" + + def testIsText(self): + """Test the IsText method.""" + bytes_in = 'this is My Weird ASCII and non whatever string.' + self.assertTrue(utils.IsText(bytes_in)) + + bytes_in = u'Plaso Síar Og Raðar Þessu' + self.assertTrue(utils.IsText(bytes_in)) + + bytes_in = '\x01\62LSO\xFF' + self.assertFalse(utils.IsText(bytes_in)) + + bytes_in = 'T\x00h\x00i\x00s\x00\x20\x00' + self.assertTrue(utils.IsText(bytes_in)) + + bytes_in = 'Ascii\x00' + self.assertTrue(utils.IsText(bytes_in)) + + bytes_in = 'Ascii Start then...\x00\x99\x23' + self.assertFalse(utils.IsText(bytes_in)) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/multi_processing/__init__.py b/plaso/multi_processing/__init__.py new file mode 100644 index 0000000..f462564 --- /dev/null +++ b/plaso/multi_processing/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/multi_processing/foreman.py b/plaso/multi_processing/foreman.py new file mode 100644 index 0000000..21c1718 --- /dev/null +++ b/plaso/multi_processing/foreman.py @@ -0,0 +1,332 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a foreman class for monitoring workers.""" + +import collections +import logging + +from plaso.multi_processing import process_info + + +class Foreman(object): + """A foreman class that monitors workers. + + The Foreman is responsible for monitoring worker processes + and give back status information. The status information contains + among other things: + + Number of events extracted from each worker. + + Path of the current file the worker is processing. + + Indications whether the worker is alive or not. + + Memory consumption of the worker. + + This information is gathered using both RPC calls to the worker + itself as well as data provided by the psutil library. + + In the future the Foreman should be able to actively monitor + the health of the processes and terminate and restart processes + that are stuck. + """ + + PROCESS_LABEL = collections.namedtuple('process_label', 'label pid process') + + def __init__(self, show_memory_usage=False): + """Initialize the foreman process. + + Args: + show_memory_usage: Optional boolean value to indicate memory information + should be included in logging. The default is false. + """ + self._last_status_dict = {} + self._process_information = process_info.ProcessInfo() + self._process_labels = [] + self._processing_done = False + self._show_memory_usage = show_memory_usage + + @property + def labels(self): + """Return a list of all currently watched labels.""" + return self._process_labels + + @property + def number_of_processes_in_watch_list(self): + """Return the number of processes in the watch list.""" + return len(self._process_labels) + + def CheckStatus(self, label=None): + """Checks status of either a single process or all from the watch list. + + Args: + label: A process label (instance of PROCESS_LABEL), if not provided + all processes from the watch list are checked. Defaults to None. + """ + if label is not None: + self._CheckStatus(label) + return + + for process_label in self._process_labels: + self._CheckStatus(process_label) + + def GetLabel(self, name=None, pid=None): + """Return a label if found using either name or PID value. + + Args: + name: String value that should match an already existing label. + pid: A process ID (PID) value for a process that is monitored. + + Returns: + A label (instance of PROCESS_LABEL) if found. If neither name + nor pid value is given or the process does not exist a None value + will be returned. + """ + if name is not None: + for process_label in self._process_labels: + if process_label.label == name: + return process_label + + if pid is not None: + for process_label in self._process_labels: + if process_label.pid == pid: + return process_label + + def MonitorWorker(self, label=None, pid=None, name=None): + """Starts monitoring a worker by adding it to the monitor list. + + This function requires either a label to be set or a PID and a process + name. If the label is empty or if both a PID and a name is not provided + the function does nothing, as in no process is added to the list of + workers to monitor (and no indication). + + Args: + label: A process label (instance of PROCESS_LABEL), if not provided + then a pid and a name is required. Defaults to None (if None + then both a pid and name have to be provided). + pid: The process ID (PID) of the worker that should be added to the + monitor list. This is only required if label is not provided. + Defaults to None. This is only used if label is set to None, in + which case it has to be set. + name: The name of the worker process, only required if label is not + provided. Defaults to None, only used if label is set to None, + in which case it has to be set. + """ + if label is None: + if pid is None or name is None: + return + label = self.PROCESS_LABEL(name, pid, process_info.ProcessInfo(pid=pid)) + + if not label: + return + + if label not in self._process_labels: + self._process_labels.append(label) + + def StopMonitoringWorker(self, label=None, pid=None, name=None): + """Stop monitoring a particular worker and remove it from monitor list. + + The purpose of this function is to remove a worker from the list of + monitored workers. In order to do that the function requires either a + label or a pid and a name. + + Args: + label: A process label (instance of PROCESS_LABEL). Defaults to None, and + so then a pid and name are required. + pid: The process ID (PID) of the worker that should no longer be + monitored. This is only required if label is not provided and + defaults to None. + name: The name of the worker process, defaults to None and is only + required if label is not set. + """ + if label is None: + if pid is None or name is None: + return + label = self.PROCESS_LABEL( + name, pid, process_info.ProcessInfo(pid=pid)) + + if label not in self._process_labels: + return + + index = self._process_labels.index(label) + del self._process_labels[index] + logging.info( + u'{0:s} [{1:d}] has been removed from foreman monitoring.'.format( + label.label, label.pid)) + + def SignalEndOfProcessing(self): + """Indicate that processing is done.""" + self._processing_done = True + # TODO: Reconsider this as an info signal. Should this not be moved to + # a debug one? + logging.info( + u'Foreman received a signal indicating that processing is completed.') + + # This function may be called via RPC functions that expects a value to be + # returned. + return True + + def TerminateProcess(self, label=None, pid=None, name=None): + """Terminate a process, even if it is not in the watch list. + + Args: + label: A process label (instance of PROCESS_LABEL), if not provided + then a pid and a name is required. It defaults to None, in which + case you need to provide a pid and/or a name. + pid: The process ID (PID) of the worker. This is only required if label + is not provided and defaults to None. + name: The name of the worker process, only required if label is not + provided and defaults to None. + """ + if label is not None: + self._TerminateProcess(label) + return + + if pid is not None: + for process_label in self._process_labels: + if process_label.pid == pid: + self._TerminateProcess(process_label) + return + + if name is not None: + for process_label in self._process_labels: + if process_label.label == name: + self._TerminateProcess(process_label) + return + + # If we reach here the process is not in our watch list. + if pid is not None and name is not None: + process_label = self.PROCESS_LABEL( + name, pid, process_info.ProcessInfo(pid=pid)) + self._TerminateProcess(process_label) + + def _CheckStatus(self, label): + """Check status for a single process from the watch list. + + This function will take a single label, which describes a worker process + and check if it is alive, call the appropriate functions to log down + information extracted from the worker and if a process is no longer alive + and processing has been marked as done, it will remove the worker from + the list of monitored workers. This function is also reponsible for killing + or terminating a process that is alive and hanging, or not alive while + it should be alive. + + In the future this function will also be responsible for restarting + a worker, or signalling the engine that it needs to spin up a new worker + in the case of a worker dying or being in an effective zombie state. + + Args: + label: A process label (instance of PROCESS_LABEL). + """ + if label not in self._process_labels: + return + + process = label.process + + if process.IsAlive(): + status_dict = process.GetProcessStatus() + if not status_dict and not self._processing_done: + logging.warning(( + u'Unable to connect to RPC socket to: {0:s} at ' + u'http://localhost:{1:d}').format(label.label, label.pid)) + + if status_dict: + self._last_status_dict[label.pid] = status_dict + if status_dict.get('is_running', False): + self._LogWorkerInformation(label, status_dict) + if self._show_memory_usage: + self._LogMemoryUsage(label) + return + else: + logging.info( + u'Process {0:s} [{1:d}] has complete it\'s processing. Total of ' + u'{2:d} events extracted'.format( + label.label, label.pid, status_dict.get('counter', 0))) + + else: + logging.info(u'Process {0:s} [{1:d}] is not alive.'.format( + label.label, label.pid)) + + # Check if this process should be alive. + if self._processing_done: + # This process exited properly and should have. Let's remove it from our + # list of labels. + self.StopMonitoringWorker(label=label) + return + + # We need to terminate the process. + # TODO: Add a function to start a new instance of a worker instead of + # just removing and killing it. + logging.error( + u'Process {0:s} [{1:d}] is not functioning when it should be. ' + u'Terminating it and removing from list.'.format( + label.label, label.pid)) + self._TerminateProcess(label) + + def _LogMemoryUsage(self, label): + """Logs memory information gathered from a process. + + This function will take a label and call the logging infrastructure to + log information about the process's memory information. + + Args: + label: A process label (instance of PROCESS_LABEL). + """ + mem_info = label.process.GetMemoryInformation() + logging.info(( + u'{0:s} - RSS: {1:d}, VMS: {2:d}, Shared: {3:d}, Text: {4:d}, lib: ' + u'{5:d}, data: {6:d}, dirty: {7:d}, Memory Percent: {8:0.2f}%').format( + label.label, mem_info.rss, mem_info.vms, mem_info.shared, + mem_info.text, mem_info.lib, mem_info.data, mem_info.dirty, + mem_info.percent * 100)) + + def _LogWorkerInformation(self, label, status=None): + """Log information gathered from the worker. + + Args: + label: A process label (instance of PROCESS_LABEL). + """ + if status: + logging.info(( + u'{0:s} [{1:d}] - Events Extracted: {2:d} - File ({3:s}) - Running: ' + u'{4!s} <{5:s}>').format( + label.label, label.pid, status.get('counter', -1), + status.get('current_file', u''), status.get('is_running', False), + unicode(label.process.status))) + + def _TerminateProcess(self, label): + """Terminate a process given a process label. + + Attempts to terminate a process and if successful + removes the label from the watch list. + + Args: + label: A process label (instance of PROCESS_LABEL). + """ + if label is None: + return + + label.process.TerminateProcess() + + # Double check the process is dead. + if label.process.IsAlive(): + logging.warning(u'Process {0:s} [{1:d}] is still alive.'.format( + label.label, label.pid)) + elif label.process.status != 'exited': + logging.warning(u'Process {0:s} [{1:d}] may still be alive.'.format( + label.label, label.pid)) + else: + logging.info(u'Process: {0:s} [{1:d}] has been terminated.'.format( + label.label, label.pid)) + self.StopMonitoringWorker(label) diff --git a/plaso/multi_processing/multi_process.py b/plaso/multi_processing/multi_process.py new file mode 100644 index 0000000..709e9db --- /dev/null +++ b/plaso/multi_processing/multi_process.py @@ -0,0 +1,700 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The multi-process processing engine.""" + +import ctypes +import logging +import multiprocessing +import os +import signal +import sys +import threading + +from plaso.engine import collector +from plaso.engine import engine +from plaso.engine import queue +from plaso.engine import worker +from plaso.lib import errors +from plaso.multi_processing import foreman +from plaso.multi_processing import rpc_proxy +from plaso.parsers import context as parsers_context + + +def SigKill(pid): + """Convenience function to issue a SIGKILL or equivalent. + + Args: + pid: The process identifier. + """ + if sys.platform.startswith('win'): + process_terminate = 1 + handle = ctypes.windll.kernel32.OpenProcess( + process_terminate, False, pid) + ctypes.windll.kernel32.TerminateProcess(handle, -1) + ctypes.windll.kernel32.CloseHandle(handle) + + else: + try: + os.kill(pid, signal.SIGKILL) + except OSError as exception: + logging.error( + u'Unable to kill process {0:d} with error: {1:s}'.format( + pid, exception)) + + +class MultiProcessEngine(engine.BaseEngine): + """Class that defines the multi-process engine.""" + + _WORKER_PROCESSES_MINIMUM = 2 + _WORKER_PROCESSES_MAXIMUM = 15 + + def __init__(self, maximum_number_of_queued_items=0): + """Initialize the multi-process engine object. + + Args: + maximum_number_of_queued_items: The maximum number of queued items. + The default is 0, which represents + no limit. + """ + collection_queue = MultiProcessingQueue( + maximum_number_of_queued_items=maximum_number_of_queued_items) + storage_queue = MultiProcessingQueue( + maximum_number_of_queued_items=maximum_number_of_queued_items) + parse_error_queue = MultiProcessingQueue( + maximum_number_of_queued_items=maximum_number_of_queued_items) + + super(MultiProcessEngine, self).__init__( + collection_queue, storage_queue, parse_error_queue) + + self._collection_process = None + self._foreman_object = None + self._storage_process = None + + # TODO: turn into a process pool. + self._worker_processes = {} + + # Attributes for RPC proxy server thread. + self._proxy_thread = None + self._rpc_proxy_server = None + self._rpc_port_number = 0 + + def _StartRPCProxyServerThread(self, foreman_object): + """Starts the RPC proxy server thread. + + Args: + foreman_object: a foreman object (instance of Foreman). + """ + if self._rpc_proxy_server or self._proxy_thread: + return + + self._rpc_proxy_server = rpc_proxy.StandardRpcProxyServer(os.getpid()) + + try: + self._rpc_proxy_server.Open() + self._rpc_proxy_server.RegisterFunction( + 'signal_end_of_collection', foreman_object.SignalEndOfProcessing) + + self._proxy_thread = threading.Thread( + name='rpc_proxy', target=self._rpc_proxy_server.StartProxy) + self._proxy_thread.start() + + self._rpc_port_number = self._rpc_proxy_server.listening_port + + except errors.ProxyFailedToStart as exception: + logging.error(( + u'Unable to setup a RPC server for the engine with error ' + u'{0:s}').format(exception)) + + def _StopRPCProxyServerThread(self): + """Stops the RPC proxy server thread.""" + if not self._rpc_proxy_server or not self._proxy_thread: + return + + # Close the proxy, free up resources so we can shut down the thread. + self._rpc_proxy_server.Close() + + if self._proxy_thread.isAlive(): + self._proxy_thread.join() + + self._proxy_thread = None + self._rpc_proxy_server = None + self._rpc_port_number = 0 + + def CreateCollector( + self, include_directory_stat, vss_stores=None, filter_find_specs=None, + resolver_context=None): + """Creates a collector object. + + The collector discovers all the files that need to be processed by + the workers. Once a file is discovered it is added to the process queue + as a path specification (instance of dfvfs.PathSpec). + + Args: + include_directory_stat: Boolean value to indicate whether directory + stat information should be collected. + vss_stores: Optional list of VSS stores to include in the collection, + where 1 represents the first store. Set to None if no + VSS stores should be processed. The default is None. + filter_find_specs: Optional list of filter find specifications (instances + of dfvfs.FindSpec). The default is None. + resolver_context: Optional resolver context (instance of dfvfs.Context). + The default is None. Note that every thread or process + must have its own resolver context. + + Returns: + A collector object (instance of Collector). + + Raises: + RuntimeError: if source path specification is not set. + """ + if not self._source_path_spec: + raise RuntimeError(u'Missing source.') + + collector_object = collector.Collector( + self._collection_queue, self._source, self._source_path_spec, + resolver_context=resolver_context) + + collector_object.SetCollectDirectoryMetadata(include_directory_stat) + + if vss_stores: + collector_object.SetVssInformation(vss_stores) + + if filter_find_specs: + collector_object.SetFilter(filter_find_specs) + + return collector_object + + def CreateExtractionWorker(self, worker_number): + """Creates an extraction worker object. + + Args: + worker_number: A number that identifies the worker. + + Returns: + An extraction worker (instance of worker.ExtractionWorker). + """ + parser_context = parsers_context.ParserContext( + self._event_queue_producer, self._parse_error_queue_producer, + self.knowledge_base) + + extraction_worker = worker.BaseEventExtractionWorker( + worker_number, self._collection_queue, self._event_queue_producer, + self._parse_error_queue_producer, parser_context) + + extraction_worker.SetEnableDebugOutput(self._enable_debug_output) + + # TODO: move profiler in separate object. + extraction_worker.SetEnableProfiling( + self._enable_profiling, + profiling_sample_rate=self._profiling_sample_rate) + + if self._open_files: + extraction_worker.SetOpenFiles(self._open_files) + + if self._filter_object: + extraction_worker.SetFilterObject(self._filter_object) + + if self._mount_path: + extraction_worker.SetMountPath(self._mount_path) + + if self._text_prepend: + extraction_worker.SetTextPrepend(self._text_prepend) + + return extraction_worker + + def ProcessSource( + self, collector_object, storage_writer, parser_filter_string=None, + number_of_extraction_workers=0, have_collection_process=True, + have_foreman_process=True, show_memory_usage=False): + """Processes the source and extracts event objects. + + Args: + collector_object: A collector object (instance of Collector). + storage_writer: A storage writer object (instance of BaseStorageWriter). + parser_filter_string: Optional parser filter string. The default is None. + number_of_extraction_workers: Optional number of extraction worker + processes. The default is 0 which means + the function will determine the suitable + number. + have_collection_process: Optional boolean value to indidate a separate + collection process should be run. The default + is true. + have_foreman_process: Optional boolean value to indidate a separate + foreman process should be run to make sure the + workers are extracting event objects. The default + is true. + show_memory_usage: Optional boolean value to indicate memory information + should be included in logging. The default is false. + """ + if number_of_extraction_workers < 1: + # One worker for each "available" CPU (minus other processes). + # The number here is derived from the fact that the engine starts up: + # + A collector process (optional). + # + A storage process. + # + # If we want to utilize all CPUs on the system we therefore need to start + # up workers that amounts to the total number of CPUs - the other + # processes. + cpu_count = multiprocessing.cpu_count() - 2 + if have_collection_process: + cpu_count -= 1 + + if cpu_count <= self._WORKER_PROCESSES_MINIMUM: + cpu_count = self._WORKER_PROCESSES_MINIMUM + + elif cpu_count >= self._WORKER_PROCESSES_MAXIMUM: + cpu_count = self._WORKER_PROCESSES_MAXIMUM + + number_of_extraction_workers = cpu_count + + if have_foreman_process: + self._foreman_object = foreman.Foreman( + show_memory_usage=show_memory_usage) + self._StartRPCProxyServerThread(self._foreman_object) + + self._storage_process = MultiProcessStorageProcess( + storage_writer, name='StorageProcess') + self._storage_process.start() + + if have_collection_process: + self._collection_process = MultiProcessCollectionProcess( + collector_object, self._rpc_port_number, name='CollectionProcess') + self._collection_process.start() + + logging.info(u'Starting extraction worker processes.') + for worker_number in range(number_of_extraction_workers): + extraction_worker = self.CreateExtractionWorker(worker_number) + + worker_name = u'Worker_{0:d}'.format(worker_number) + + # TODO: Test to see if a process pool can be a better choice. + worker_process = MultiProcessEventExtractionWorkerProcess( + extraction_worker, parser_filter_string, name=worker_name) + worker_process.start() + + if self._foreman_object: + self._foreman_object.MonitorWorker( + pid=worker_process.pid, name=worker_name) + + self._worker_processes[worker_name] = worker_process + + logging.debug(u'Collection started.') + if not self._collection_process: + collector_object.Collect() + + else: + while self._collection_process.is_alive(): + self._collection_process.join(timeout=10) + + # Check the worker status regularly while collection is still ongoing. + if self._foreman_object: + self._foreman_object.CheckStatus() + + # TODO: We get a signal when collection is done, which might happen + # before the collection thread joins. Look at the option of speeding + # up the process of the collector stopping by potentially killing it. + + logging.info(u'Collection stopped.') + + self._StopProcessing() + + def _StopProcessing(self): + """Stops the foreman and worker processes.""" + if self._foreman_object: + self._foreman_object.SignalEndOfProcessing() + self._StopRPCProxyServerThread() + + # Run through the running workers, one by one. + # This will go through a list of all active worker processes and check it's + # status. If a worker has completed it will be removed from the list. + # The process will not wait longer than five seconds for each worker to + # complete, if longer time passes it will simply check it's status and + # move on. That ensures that worker process is monitored and status is + # updated. + while self._worker_processes: + # Note that self._worker_processes is altered in this loop hence we need + # it to be sorted. + for process_name, process_obj in sorted(self._worker_processes.items()): + if self._foreman_object: + worker_label = self._foreman_object.GetLabel( + name=process_name, pid=process_obj.pid) + else: + worker_label = None + + if not worker_label: + if process_obj.is_alive(): + logging.info(( + u'Process {0:s} [{1:d}] is not monitored by the foreman. Most ' + u'likely due to a worker having completed it\'s processing ' + u'while waiting for another worker to complete.').format( + process_name, process_obj.pid)) + logging.info( + u'Waiting for worker {0:s} to complete.'.format(process_name)) + process_obj.join() + logging.info(u'Worker: {0:s} [{1:d}] has completed.'.format( + process_name, process_obj.pid)) + + del self._worker_processes[process_name] + continue + + if process_obj.is_alive(): + # Check status of worker. + self._foreman_object.CheckStatus(label=worker_label) + process_obj.join(timeout=5) + + # Note that we explicitly must test against exitcode 0 here since + # process.exitcode will be None if there is no exitcode. + elif process_obj.exitcode != 0: + logging.warning(( + u'Worker process: {0:s} already exited with code: ' + u'{1:d}.').format(process_name, process_obj.exitcode)) + process_obj.terminate() + self._foreman_object.TerminateProcess(label=worker_label) + + else: + # Process is no longer alive, no need to monitor. + self._foreman_object.StopMonitoringWorker(label=worker_label) + # Remove it from our list of active workers. + del self._worker_processes[process_name] + + if self._foreman_object: + self._foreman_object = None + + logging.info(u'Extraction workers stopped.') + self._event_queue_producer.SignalEndOfInput() + + self._storage_process.join() + logging.info(u'Storage writer stopped.') + + def _AbortNormal(self, timeout=None): + """Abort in a normal way. + + Args: + timeout: The process join timeout. The default is None meaning + no timeout. + """ + if self._collection_process: + logging.warning(u'Signaling collection process to abort.') + self._collection_process.SignalAbort() + + if self._worker_processes: + logging.warning(u'Signaling worker processes to abort.') + for _, worker_process in self._worker_processes.iteritems(): + worker_process.SignalAbort() + + logging.warning(u'Signaling storage process to abort.') + self._event_queue_producer.SignalEndOfInput() + self._storage_process.SignalAbort() + + if self._collection_process: + logging.warning(u'Waiting for collection process: {0:d}.'.format( + self._collection_process.pid)) + # TODO: it looks like xmlrpclib.ServerProxy is not allowing the + # collection process to close. + self._collection_process.join(timeout=timeout) + + if self._worker_processes: + for worker_name, worker_process in self._worker_processes.iteritems(): + logging.warning(u'Waiting for worker: {0:s} process: {1:d}'.format( + worker_name, worker_process.pid)) + worker_process.join(timeout=timeout) + + if self._storage_process: + logging.warning(u'Waiting for storage process: {0:d}.'.format( + self._collection_process.pid)) + self._storage_process.join(timeout=timeout) + + def _AbortTerminate(self): + """Abort processing by sending SIGTERM or equivalent.""" + if self._collection_process and self._collection_process.is_alive(): + logging.warning(u'Terminating collection process: {0:d}.'.format( + self._collection_process.pid)) + self._collection_process.terminate() + + if self._worker_processes: + for worker_name, worker_process in self._worker_processes.iteritems(): + if worker_process.is_alive(): + logging.warning(u'Terminating worker: {0:s} process: {1:d}'.format( + worker_name, worker_process.pid)) + worker_process.terminate() + + if self._storage_process and self._storage_process.is_alive(): + logging.warning(u'Terminating storage process: {0:d}.'.format( + self._storage_process.pid)) + self._storage_process.terminate() + + def _AbortKill(self): + """Abort processing by sending SIGKILL or equivalent.""" + if self._collection_process and self._collection_process.is_alive(): + logging.warning(u'Killing collection process: {0:d}.'.format( + self._collection_process.pid)) + SigKill(self._collection_process.pid) + + if self._worker_processes: + for worker_name, worker_process in self._worker_processes.iteritems(): + if worker_process.is_alive(): + logging.warning(u'Killing worker: {0:s} process: {1:d}'.format( + worker_name, worker_process.pid)) + SigKill(worker_process.pid) + + if self._storage_process and self._storage_process.is_alive(): + logging.warning(u'Killing storage process: {0:d}.'.format( + self._storage_process.pid)) + SigKill(self._storage_process.pid) + + def SignalAbort(self): + """Signals the engine to abort.""" + super(MultiProcessEngine, self).SignalAbort() + + try: + self._AbortNormal(timeout=2) + self._AbortTerminate() + except KeyboardInterrupt: + self._AbortKill() + + # TODO: remove the need for this. + # Sometimes the main process will be unresponsive. + SigKill(os.getpid()) + + +class MultiProcessCollectionProcess(multiprocessing.Process): + """Class that defines a multi-processing collection process.""" + + def __init__(self, collector_object, rpc_port_number, **kwargs): + """Initializes the process object. + + Args: + collector_object: A collector object (instance of Collector). + rpc_port_number: An integer value containing the RPC end point port + number or 0 if not set. + """ + super(MultiProcessCollectionProcess, self).__init__(**kwargs) + self._collector_object = collector_object + self._rpc_port_number = rpc_port_number + + # This method part of the multiprocessing.Process interface hence its name + # is not following the style guide. + def run(self): + """The main loop.""" + # Prevent the KeyboardInterrupt being raised inside the worker process. + # This will prevent a collection process to generate a traceback + # when interrupted. + signal.signal(signal.SIGINT, signal.SIG_IGN) + + logging.debug(u'Collection process: {0!s} started'.format(self._name)) + + rpc_proxy_client = None + if self._rpc_port_number: + try: + rpc_proxy_client = rpc_proxy.StandardRpcProxyClient( + self._rpc_port_number) + rpc_proxy_client.Open() + + except errors.ProxyFailedToStart as exception: + logging.error(( + u'Unable to setup a RPC client for the collector process with ' + u'error {0:s}').format(exception)) + + self._collector_object.Collect() + + logging.debug(u'Collection process: {0!s} stopped'.format(self._name)) + if rpc_proxy_client: + _ = rpc_proxy_client.GetData(u'signal_end_of_collection') + + def SignalAbort(self): + """Signals the process to abort.""" + self._collector_object.SignalAbort() + + +class MultiProcessEventExtractionWorkerProcess(multiprocessing.Process): + """Class that defines a multi-processing event extraction worker process.""" + + def __init__(self, extraction_worker, parser_filter_string, **kwargs): + """Initializes the process object. + + Args: + extraction_worker: The extraction worker object (instance of + MultiProcessEventExtractionWorker). + parser_filter_string: Optional parser filter string. The default is None. + """ + super(MultiProcessEventExtractionWorkerProcess, self).__init__(**kwargs) + self._extraction_worker = extraction_worker + + # TODO: clean this up with the implementation of a task based + # multi-processing approach. + self._parser_filter_string = parser_filter_string + + # Attributes for RPC proxy server thread. + self._proxy_thread = None + self._rpc_proxy_server = None + + def _StartRPCProxyServerThread(self): + """Starts the RPC proxy server thread.""" + if self._rpc_proxy_server or self._proxy_thread: + return + + # Set up a simple XML RPC server for the worker for status indications. + # Since we don't know the worker's PID for now we'll set the initial port + # number to zero and then adjust it later. + self._rpc_proxy_server = rpc_proxy.StandardRpcProxyServer() + + try: + self._rpc_proxy_server.SetListeningPort(os.getpid()) + self._rpc_proxy_server.Open() + self._rpc_proxy_server.RegisterFunction( + 'status', self._extraction_worker.GetStatus) + + self._proxy_thread = threading.Thread( + name='rpc_proxy', target=self._rpc_proxy_server.StartProxy) + self._proxy_thread.start() + + except errors.ProxyFailedToStart as exception: + logging.error(( + u'Unable to setup a RPC server for the worker: {0:d} [PID {1:d}] ' + u'with error: {2:s}').format( + self._identifier, os.getpid(), exception)) + + def _StopRPCProxyServerThread(self): + """Stops the RPC proxy server thread.""" + if not self._rpc_proxy_server or not self._proxy_thread: + return + + # Close the proxy, free up resources so we can shut down the thread. + self._rpc_proxy_server.Close() + + if self._proxy_thread.isAlive(): + self._proxy_thread.join() + + self._rpc_proxy_server = None + self._proxy_thread = None + + # This method part of the multiprocessing.Process interface hence its name + # is not following the style guide. + def run(self): + """The main loop.""" + # Prevent the KeyboardInterrupt being raised inside the worker process. + # This will prevent a worker process to generate a traceback + # when interrupted. + signal.signal(signal.SIGINT, signal.SIG_IGN) + + # We need to initialize the parser object after the process + # has forked otherwise on Windows the "fork" will fail with + # a PickleError for Python modules that cannot be pickled. + self._extraction_worker.InitalizeParserObjects( + parser_filter_string=self._parser_filter_string) + + logging.debug(u'Worker process: {0!s} started'.format(self._name)) + self._StartRPCProxyServerThread() + + self._extraction_worker.Run() + + logging.debug(u'Worker process: {0!s} stopped'.format(self._name)) + self._StopRPCProxyServerThread() + + def SignalAbort(self): + """Signals the process to abort.""" + self._extraction_worker.SignalAbort() + + +class MultiProcessStorageProcess(multiprocessing.Process): + """Class that defines a multi-processing storage process.""" + + def __init__(self, storage_writer, **kwargs): + """Initializes the process object. + + Args: + storage_writer: A storage writer object (instance of BaseStorageWriter). + """ + super(MultiProcessStorageProcess, self).__init__(**kwargs) + self._storage_writer = storage_writer + + # This method part of the multiprocessing.Process interface hence its name + # is not following the style guide. + def run(self): + """The main loop.""" + # Prevent the KeyboardInterrupt being raised inside the worker process. + # This will prevent a storage process to generate a traceback + # when interrupted. + signal.signal(signal.SIGINT, signal.SIG_IGN) + + logging.debug(u'Storage process: {0!s} started'.format(self._name)) + self._storage_writer.WriteEventObjects() + logging.debug(u'Storage process: {0!s} stopped'.format(self._name)) + + def SignalAbort(self): + """Signals the process to abort.""" + self._storage_writer.SignalAbort() + + +class MultiProcessingQueue(queue.Queue): + """Class that defines the multi-processing queue.""" + + def __init__(self, maximum_number_of_queued_items=0): + """Initializes the multi-processing queue object. + + Args: + maximum_number_of_queued_items: The maximum number of queued items. + The default is 0, which represents + no limit. + """ + super(MultiProcessingQueue, self).__init__() + + # maxsize contains the maximum number of items allowed to be queued, + # where 0 represents unlimited. + # We need to check that we aren't asking for a bigger queue than the + # platform supports, which requires access to this protected member. + # pylint: disable=protected-access + queue_max_length = multiprocessing._multiprocessing.SemLock.SEM_VALUE_MAX + # pylint: enable=protected-access + if maximum_number_of_queued_items > queue_max_length: + logging.warn( + u'Maximum queue size requested ({0:d}) is larger than system ' + u'supported maximum size. Setting queue size to maximum supported ' + u'size, ' + u'({1:d})'.format(maximum_number_of_queued_items, queue_max_length)) + maximum_number_of_queued_items = queue_max_length + self._queue = multiprocessing.Queue( + maxsize=maximum_number_of_queued_items) + + def __len__(self): + """Returns the estimated current number of items in the queue.""" + size = 0 + try: + size = self._queue.qsize() + except NotImplementedError: + logging.warning(( + u'Returning queue length does not work on Mac OS X because of broken ' + u'sem_getvalue()')) + raise + + return size + + def IsEmpty(self): + """Determines if the queue is empty.""" + return self._queue.empty() + + def PushItem(self, item): + """Pushes an item onto the queue.""" + self._queue.put(item) + + def PopItem(self): + """Pops an item off the queue.""" + try: + return self._queue.get() + except KeyboardInterrupt: + raise errors.QueueEmpty diff --git a/plaso/multi_processing/multi_process_test.py b/plaso/multi_processing/multi_process_test.py new file mode 100644 index 0000000..0f5c8cf --- /dev/null +++ b/plaso/multi_processing/multi_process_test.py @@ -0,0 +1,52 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests the multi-process processing engine.""" + +import unittest + +from plaso.engine import test_lib +from plaso.multi_processing import multi_process + + +class MultiProcessingQueueTest(unittest.TestCase): + """Tests the multi-processing queue.""" + + _ITEMS = frozenset(['item1', 'item2', 'item3', 'item4']) + + def testPushPopItem(self): + """Tests the PushItem and PopItem functions.""" + test_queue = multi_process.MultiProcessingQueue() + + for item in self._ITEMS: + test_queue.PushItem(item) + + try: + self.assertEquals(len(test_queue), len(self._ITEMS)) + except NotImplementedError: + # On Mac OS X because of broken sem_getvalue() + return + + test_queue.SignalEndOfInput() + test_queue_consumer = test_lib.TestQueueConsumer(test_queue) + test_queue_consumer.ConsumeItems() + + self.assertEquals(test_queue_consumer.number_of_items, len(self._ITEMS)) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/multi_processing/process_info.py b/plaso/multi_processing/process_info.py new file mode 100644 index 0000000..fc22377 --- /dev/null +++ b/plaso/multi_processing/process_info.py @@ -0,0 +1,259 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a class to get process information.""" + +import collections +import os +import SocketServer + +import psutil + +from plaso.lib import timelib +from plaso.multi_processing import rpc_proxy + + +class ProcessInfo(object): + """Class that provides information about a running process.""" + + _MEMORY_INFORMATION = collections.namedtuple( + 'memory_information', 'rss vms shared text lib data dirty percent') + + def __init__(self, pid=None): + """Initialize the process information object. + + Args: + pid: Process ID (PID) value of the process to monitor. The default value + is None in which case the PID of the calling + process will be used. + + Raises: + IOError: If the pid does not exist. + """ + if pid is None: + self._pid = os.getpid() + else: + self._pid = pid + + if not psutil.pid_exists(self._pid): + raise IOError(u'Unable to read data from pid: {0:d}'.format(self._pid)) + + self._command_line = '' + self._parent = None + self._process = psutil.Process(self._pid) + if getattr(psutil, 'version_info', (0, 0, 0)) < (2, 0, 0): + self._psutil_pre_v2 = True + else: + self._psutil_pre_v2 = False + + # TODO: Allow the client proxy object to determined at run time and not + # a fixed value as here. + self._rpc_client = rpc_proxy.StandardRpcProxyClient(self._pid) + self._rpc_client.Open() + + @property + def pid(self): + """Return the process ID (PID).""" + return self._pid + + @property + def name(self): + """Return the name of the process.""" + if self._psutil_pre_v2: + return self._process.name + + return self._process.name() + + @property + def command_line(self): + """Return the full command line used to start the process.""" + if self._command_line: + return self._command_line + + try: + if self._psutil_pre_v2: + command_lines = self._process.cmdline + else: + command_lines = self._process.cmdline() + + self._command_line = u' '.join(command_lines) + except psutil.NoSuchProcess: + return + + return self._command_line + + @property + def parent(self): + """Return a ProcessInfo object for the parent process.""" + if self._parent is not None: + return self._parent + + try: + if self._psutil_pre_v2: + parent_pid = self._process.parent.pid + else: + parent = self._process.parent() # pylint: disable-msg=not-callable + parent_pid = parent.pid + + self._parent = ProcessInfo(pid=parent_pid) + return self._parent + except psutil.NoSuchProcess: + return + + @property + def open_files(self): + """Yield a list of open files the process has.""" + try: + for open_file in self._process.get_open_files(): + yield open_file.path + except (psutil.AccessDenied, psutil.NoSuchProcess): + return + + @property + def children(self): + """Yield all child processes as a ProcessInfo object.""" + try: + for child in self._process.get_children(): + yield ProcessInfo(pid=child.pid) + except psutil.NoSuchProcess: + # We are creating an empty generator here. Yield or return None + # individually don't provide that behavior, neither does raising + # GeneratorExit or StopIteration. + # pylint: disable=unreachable + return + yield + + @property + def number_of_threads(self): + """Return back the number of threads this process has.""" + try: + return self._process.get_num_threads() + except psutil.NoSuchProcess: + return 0 + + @property + def memory_map(self): + """Yield memory map objects (instance of mmap).""" + try: + for memory_map in self._process.get_memory_maps(): + yield memory_map + except psutil.NoSuchProcess: + # We are creating an empty generator here. Yield or return None + # individually don't provide that behavior, neither does raising + # GeneratorExit or StopIteration. + # pylint: disable=unreachable + return + yield + + @property + def status(self): + """Return the process status.""" + try: + if self._psutil_pre_v2: + return self._process.status + else: + return self._process.status() + except psutil.NoSuchProcess: + return u'exited' + + @property + def start_time(self): + """Return back the start time of the process. + + Returns: + An integer representing the number of microseconds since Unix Epoch time + in UTC. + """ + if self._psutil_pre_v2: + create_time = self._process.create_time + else: + create_time = self._process.create_time() + return timelib.Timestamp.FromPosixTime(int(create_time)) + + @property + def io_counters(self): + """Return back IO Counters for the process.""" + try: + return self._process.get_io_counters() + except psutil.NoSuchProcess: + return + + @property + def cpu_times(self): + """Return back CPU times for the process.""" + try: + return self._process.get_cpu_times() + except psutil.NoSuchProcess: + return + + @property + def cpu_percent(self): + """Return back the percent of CPU processing this process consumes.""" + try: + return self._process.get_cpu_percent() + except psutil.NoSuchProcess: + return + + def GetMemoryInformation(self): + """Return back memory information as a memory_information object. + + Returns: + Memory information object (instance of memory_information) a named + tuple that contains the following attributes: rss, vms, shared, text, + lib, data, dirty, percent. + """ + try: + external_information = self._process.get_ext_memory_info() + except psutil.NoSuchProcess: + return + + percent = self._process.get_memory_percent() + + # Psutil will return different memory information depending on what is + # available in that platform. + # TODO: Not be as strict in what gets returned, have this object more + # flexible so that the memory information returned reflects the available + # information in the platform. + return self._MEMORY_INFORMATION( + getattr(external_information, 'rss', 0), + getattr(external_information, 'vms', 0), + getattr(external_information, 'shared', 0), + getattr(external_information, 'text', 0), + getattr(external_information, 'lib', 0), + getattr(external_information, 'data', 0), + getattr(external_information, 'dirty', 0), percent) + + def GetProcessStatus(self): + """Attempt to connect to process via RPC to gather status information.""" + if self._rpc_client is None: + return + try: + status = self._rpc_client.GetData('status') + if isinstance(status, dict): + return status + except SocketServer.socket.error: + return + + def IsAlive(self): + """Return a boolean value indicating if the process is alive or not.""" + return self._process.is_running() + + def TerminateProcess(self): + """Terminate the process.""" + # TODO: Make sure the process has really been terminated. + if self.IsAlive(): + self._process.terminate() diff --git a/plaso/multi_processing/rpc_proxy.py b/plaso/multi_processing/rpc_proxy.py new file mode 100644 index 0000000..70e54d5 --- /dev/null +++ b/plaso/multi_processing/rpc_proxy.py @@ -0,0 +1,134 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Simple RPC proxy server and client.""" + +import logging +import SimpleXMLRPCServer +import SocketServer +import xmlrpclib + +from xml.parsers import expat + +from plaso.lib import errors +from plaso.lib import proxy + + +class StandardRpcProxyServer(proxy.ProxyServer): + """Class that implements a simple XML RPC based proxy server.""" + + def __init__(self, port=0): + """Initializes the RPC proxy server object. + + Args: + port: The port number the proxy should listen on. Defaults to 0. + """ + super(StandardRpcProxyServer, self).__init__( + proxy.GetProxyPortNumberFromPID(port)) + self._proxy = None + + def Close(self): + """Close the proxy object.""" + if not self._proxy: + return + self._proxy.shutdown() + self._proxy = None + + def Open(self): + """Set up the proxy so that it can be started.""" + try: + self._proxy = SimpleXMLRPCServer.SimpleXMLRPCServer( + ('localhost', self.listening_port), logRequests=False, + allow_none=True) + except SocketServer.socket.error as exception: + raise errors.ProxyFailedToStart( + u'Unable to setup a RPC server for listening to port: {0:d} with ' + u'error: {1:s}'.format(self.listening_port, exception)) + + def SetListeningPort(self, new_port_number): + """Change the port number the proxy listens to.""" + # We don't want to change the port after the proxy has been started. + if self._proxy: + logging.warning( + u'Unable to change proxy ports for an already started proxy.') + return + + self._port_number = proxy.GetProxyPortNumberFromPID(new_port_number) + + def StartProxy(self): + """Start the proxy.""" + if not self._proxy: + raise errors.ProxyFailedToStart(u'Proxy not set up yet.') + self._proxy.serve_forever() + + def RegisterFunction(self, function_name, function): + """Register a function to this RPC proxy. + + Args: + function_name: The name of the proxy function. + function: Callback method to the function providing the requested + information. + """ + if not self._proxy: + raise errors.ProxyFailedToStart(( + u'Unable to register a function for a proxy that has not been set ' + u'up yet.')) + self._proxy.register_function(function, function_name) + + +class StandardRpcProxyClient(proxy.ProxyClient): + """Class that implements a simple XML RPC based proxy client.""" + + def __init__(self, port=0): + """Initializes the RPC proxy client object. + + Args: + port: The port number the proxy should connect to. Defaults to 0. + """ + super(StandardRpcProxyClient, self).__init__( + proxy.GetProxyPortNumberFromPID(port)) + self._proxy = None + + def Open(self): + """Set up the proxy so that it can be started.""" + try: + self._proxy = xmlrpclib.ServerProxy( + u'http://localhost:{0:d}'.format(self._port_number), allow_none=True) + except SocketServer.socket.error: + self._proxy = None + + def GetData(self, call_back_name): + """Return back data from the RPC proxy using a callback method. + + Args: + call_back_name: The name of the callback method that the RPC proxy + supports. + + Returns: + The data returned back by the callback method. + """ + if self._proxy is None: + return + + call_back = getattr(self._proxy, call_back_name, None) + if call_back is None: + return + + try: + return call_back() + except (SocketServer.socket.error, expat.ExpatError): + return diff --git a/plaso/output/__init__.py b/plaso/output/__init__.py new file mode 100644 index 0000000..76ca9b8 --- /dev/null +++ b/plaso/output/__init__.py @@ -0,0 +1,34 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an import statement for each output plugin.""" + +from plaso.output import dynamic +try: + from plaso.output import elastic +except ImportError: + pass +from plaso.output import json_out +from plaso.output import l2t_csv +from plaso.output import l2t_tln +try: + from plaso.output import mysql_4n6 +except ImportError: + pass +from plaso.output import pstorage +from plaso.output import rawpy +from plaso.output import sqlite_4n6 +from plaso.output import tln diff --git a/plaso/output/dynamic.py b/plaso/output/dynamic.py new file mode 100644 index 0000000..b4a8f66 --- /dev/null +++ b/plaso/output/dynamic.py @@ -0,0 +1,300 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains a formatter for a dynamic output module for plaso.""" + +import logging +import re + +from plaso.formatters import manager as formatters_manager +from plaso.lib import errors +from plaso.lib import output +from plaso.lib import timelib +from plaso.output import helper + + +class Dynamic(output.FileLogOutputFormatter): + """Dynamic selection of fields for a separated value output format.""" + + FORMAT_ATTRIBUTE_RE = re.compile('{([^}]+)}') + + # A dict containing mappings between "special" attributes and + # how they should be calculated and presented. + # They should be documented here: + # http://plaso.kiddaland.net/usage/psort/output + SPECIAL_HANDLING = { + 'date': 'ParseDate', + 'datetime': 'ParseDateTime', + 'description': 'ParseMessage', + 'description_short': 'ParseMessageShort', + 'host': 'ParseHostname', + 'hostname': 'ParseHostname', + 'inode': 'ParseInode', + 'macb': 'ParseMacb', + 'message': 'ParseMessage', + 'message_short': 'ParseMessageShort', + 'source': 'ParseSourceShort', + 'sourcetype': 'ParseSource', + 'source_long': 'ParseSource', + 'tag': 'ParseTag', + 'time': 'ParseTime', + 'timezone': 'ParseZone', + 'type': 'ParseTimestampDescription', + 'user': 'ParseUsername', + 'username': 'ParseUsername', + 'zone': 'ParseZone', + } + + def ParseTimestampDescription(self, event_object): + """Return the timestamp description.""" + return getattr(event_object, 'timestamp_desc', '-') + + def ParseTag(self, event_object): + """Return tagging information.""" + tag = getattr(event_object, 'tag', None) + + if not tag: + return u'-' + + return u' '.join(tag.tags) + + def ParseSource(self, event_object): + """Return the source string.""" + # TODO: move this to an output module interface. + event_formatter = formatters_manager.EventFormatterManager.GetFormatter( + event_object) + if not event_formatter: + raise errors.NoFormatterFound( + u'Unable to find no event formatter for: {0:s}.'.format( + event_object.DATA_TYPE)) + + _, source = event_formatter.GetSources(event_object) + return source + + def ParseSourceShort(self, event_object): + """Return the source string.""" + # TODO: move this to an output module interface. + event_formatter = formatters_manager.EventFormatterManager.GetFormatter( + event_object) + if not event_formatter: + raise errors.NoFormatterFound( + u'Unable to find no event formatter for: {0:s}.'.format( + event_object.DATA_TYPE)) + + source, _ = event_formatter.GetSources(event_object) + return source + + def ParseZone(self, _): + """Return a timezone.""" + return self.zone + + def ParseDate(self, event_object): + """Return a date string from a timestamp value.""" + try: + date_use = timelib.Timestamp.CopyToDatetime( + event_object.timestamp, self.zone, raise_error=True) + except OverflowError as exception: + logging.error(( + u'Unable to copy {0:d} into a human readable timestamp with error: ' + u'{1:s}. Event {2:d}:{3:d} triggered the exception.').format( + event_object.timestamp, exception, + getattr(event_object, 'store_number', u''), + getattr(event_object, 'store_index', u''))) + return u'0000-00-00' + return u'{0:04d}-{1:02d}-{2:02d}'.format( + date_use.year, date_use.month, date_use.day) + + def ParseDateTime(self, event_object): + """Return a datetime object from a timestamp, in an ISO format.""" + try: + return timelib.Timestamp.CopyToIsoFormat( + event_object.timestamp, timezone=self.zone, raise_error=True) + + except OverflowError as exception: + logging.error(( + u'Unable to copy {0:d} into a human readable timestamp with error: ' + u'{1:s}. Event {2:d}:{3:d} triggered the exception.').format( + event_object.timestamp, exception, + getattr(event_object, 'store_number', u''), + getattr(event_object, 'store_index', u''))) + return u'0000-00-00T00:00:00' + + def ParseTime(self, event_object): + """Return a timestamp string from an integer timestamp value.""" + try: + date_use = timelib.Timestamp.CopyToDatetime( + event_object.timestamp, self.zone, raise_error=True) + except OverflowError as exception: + logging.error(( + u'Unable to copy {0:d} into a human readable timestamp with error: ' + u'{1:s}. Event {2:d}:{3:d} triggered the exception.').format( + event_object.timestamp, exception, + getattr(event_object, 'store_number', u''), + getattr(event_object, 'store_index', u''))) + return u'00:00:00' + return u'{0:02d}:{1:02d}:{2:02d}'.format( + date_use.hour, date_use.minute, date_use.second) + + def ParseHostname(self, event_object): + """Return a hostname.""" + hostname = getattr(event_object, 'hostname', '') + if self.store: + if not hostname: + hostname = self._hostnames.get(event_object.store_number, '-') + + return hostname + + # TODO: move this into a base output class. + def ParseUsername(self, event_object): + """Determines an username based on an event and extracted information. + + Uses the extracted information from the pre processing information and the + event object itself to determine an username. + + Args: + event_object: The event object (instance of EventObject). + + Returns: + An Unicode string containing the username, or - if none found. + """ + username = getattr(event_object, u'username', u'-') + if self.store: + pre_obj = self._preprocesses.get(event_object.store_number) + if pre_obj: + check_user = pre_obj.GetUsernameById(username) + + if check_user != u'-': + username = check_user + + if username == '-' and hasattr(event_object, u'user_sid'): + if not pre_obj: + return getattr(event_object, u'user_sid', u'-') + + return pre_obj.GetUsernameById( + getattr(event_object, u'user_sid', u'-')) + + return username + + def ParseMessage(self, event_object): + """Return the message string from the EventObject. + + Args: + event_object: The event object (EventObject). + + Raises: + errors.NoFormatterFound: If no formatter for that event is found. + """ + # TODO: move this to an output module interface. + event_formatter = formatters_manager.EventFormatterManager.GetFormatter( + event_object) + if not event_formatter: + raise errors.NoFormatterFound( + u'Unable to find no event formatter for: {0:s}.'.format( + event_object.DATA_TYPE)) + + msg, _ = event_formatter.GetMessages(event_object) + return msg + + def ParseMessageShort(self, event_object): + """Return the message string from the EventObject. + + Args: + event_object: The event object (EventObject). + + Raises: + errors.NoFormatterFound: If no formatter for that event is found. + """ + # TODO: move this to an output module interface. + event_formatter = formatters_manager.EventFormatterManager.GetFormatter( + event_object) + if not event_formatter: + raise errors.NoFormatterFound( + u'Unable to find no event formatter for: {0:s}.'.format( + event_object.DATA_TYPE)) + + _, msg_short = event_formatter.GetMessages(event_object) + return msg_short + + def ParseInode(self, event_object): + """Return an inode number.""" + inode = getattr(event_object, 'inode', '-') + if inode == '-': + if hasattr(event_object, 'pathspec') and hasattr( + event_object.pathspec, 'image_inode'): + inode = event_object.pathspec.image_inode + + return inode + + def ParseMacb(self, event_object): + """Return a legacy MACB representation.""" + return helper.GetLegacy(event_object) + + def Start(self): + """Returns a header for the output.""" + # Start by finding out which fields are to be used. + self.fields = [] + + if self._filter: + self.fields = self._filter.fields + self.separator = self._filter.separator + else: + self.separator = u',' + + if not self.fields: + # TODO: Evaluate which fields should be included by default. + self.fields = [ + 'datetime', 'timestamp_desc', 'source', 'source_long', + 'message', 'parser', 'display_name', 'tag', 'store_number', + 'store_index'] + + if self.store: + self._hostnames = helper.BuildHostDict(self.store) + self._preprocesses = {} + for info in self.store.GetStorageInformation(): + if hasattr(info, 'store_range'): + for store_number in range(info.store_range[0], info.store_range[1]): + self._preprocesses[store_number] = info + + self.filehandle.WriteLine('{0:s}\n'.format( + self.separator.join(self.fields))) + + def WriteEvent(self, event_object): + """Write a single event.""" + try: + self.EventBody(event_object) + except errors.NoFormatterFound: + logging.error(u'Unable to output line, no formatter found.') + logging.error(event_object) + + def EventBody(self, event_object): + """Formats data as "dynamic" CSV and writes to the filehandle.""" + row = [] + for field in self.fields: + has_call_back = self.SPECIAL_HANDLING.get(field, None) + call_back = None + if has_call_back: + call_back = getattr(self, has_call_back, None) + + if call_back: + row.append(call_back(event_object)) + else: + row.append(getattr(event_object, field, u'-')) + + out_write = u'{0:s}\n'.format( + self.separator.join(unicode(x).replace( + self.separator, u' ') for x in row)) + self.filehandle.WriteLine(out_write) diff --git a/plaso/output/dynamic_test.py b/plaso/output/dynamic_test.py new file mode 100644 index 0000000..57082fd --- /dev/null +++ b/plaso/output/dynamic_test.py @@ -0,0 +1,131 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for plaso.output.l2t_csv.""" + +import StringIO +import unittest + +from plaso.formatters import interface as formatters_interface +from plaso.lib import event +from plaso.lib import eventdata +from plaso.output import dynamic + + +class TestEvent(event.EventObject): + DATA_TYPE = 'test:dynamic' + + def __init__(self): + super(TestEvent, self).__init__() + self.timestamp = 1340821021000000 + self.timestamp_desc = eventdata.EventTimestamp.CHANGE_TIME + self.hostname = 'ubuntu' + self.filename = 'log/syslog.1' + self.text = ( + u'Reporter PID: 8442 (pam_unix(cron:session): session\n ' + u'closed for user root)') + + +class TestEventFormatter(formatters_interface.EventFormatter): + DATA_TYPE = 'test:dynamic' + FORMAT_STRING = u'{text}' + + SOURCE_SHORT = 'LOG' + SOURCE_LONG = 'Syslog' + + +class FakeFilter(object): + """Provide a fake filter, that defines which fields to use.""" + + def __init__(self, fields, separator=u','): + self.fields = fields + self.separator = separator + + +class DynamicTest(unittest.TestCase): + """Test the dynamic output module.""" + + def testHeader(self): + output = StringIO.StringIO() + formatter = dynamic.Dynamic(None, output) + correct_line = ( + 'datetime,timestamp_desc,source,source_long,message,parser,' + 'display_name,tag,store_number,store_index\n') + + formatter.Start() + self.assertEquals(output.getvalue(), correct_line) + + output = StringIO.StringIO() + formatter = dynamic.Dynamic(None, output, filter_use=FakeFilter( + ['date', 'time', 'message', 'hostname', 'filename', 'some_stuff'])) + + correct_line = 'date,time,message,hostname,filename,some_stuff\n' + formatter.Start() + self.assertEquals(output.getvalue(), correct_line) + + output = StringIO.StringIO() + formatter = dynamic.Dynamic(None, output, filter_use=FakeFilter( + ['date', 'time', 'message', 'hostname', 'filename', 'some_stuff'], + '@')) + + correct_line = 'date@time@message@hostname@filename@some_stuff\n' + formatter.Start() + self.assertEquals(output.getvalue(), correct_line) + + def testEventBody(self): + """Test ensures that returned lines returned are fmt CSV as expected.""" + event_object = TestEvent() + output = StringIO.StringIO() + + formatter = dynamic.Dynamic(None, output, filter_use=FakeFilter( + ['date', 'time', 'timezone', 'macb', 'source', 'sourcetype', 'type', + 'user', 'host', 'message_short', 'message', 'filename', + 'inode', 'notes', 'format', 'extra'])) + + formatter.Start() + header = ( + 'date,time,timezone,macb,source,sourcetype,type,user,host,' + 'message_short,message,filename,inode,notes,format,extra\n') + self.assertEquals(output.getvalue(), header) + + formatter.EventBody(event_object) + correct = ( + '2012-06-27,18:17:01,UTC,..C.,LOG,Syslog,Metadata Modification Time,-,' + 'ubuntu,Reporter PID: 8442 (pam_unix(cron:session): session ' + 'closed for user root),Reporter PID: 8442 ' + '(pam_unix(cron:session): session closed for user root),log/syslog.1' + ',-,-,-,-\n') + self.assertEquals(output.getvalue(), header + correct) + + output = StringIO.StringIO() + formatter = dynamic.Dynamic(None, output, filter_use=FakeFilter( + ['datetime', 'nonsense', 'hostname', 'message'])) + + header = 'datetime,nonsense,hostname,message\n' + formatter.Start() + self.assertEquals(output.getvalue(), header) + + correct = ( + '2012-06-27T18:17:01+00:00,-,ubuntu,Reporter PID: 8442' + ' (pam_unix(cron:session): session closed for user root)\n') + + formatter.EventBody(event_object) + self.assertEquals(output.getvalue(), header + correct) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/output/elastic.py b/plaso/output/elastic.py new file mode 100644 index 0000000..554e721 --- /dev/null +++ b/plaso/output/elastic.py @@ -0,0 +1,235 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""An output module that saves data into an ElasticSearch database.""" + +import logging +import requests +import sys +import uuid + +import pyelasticsearch + +from plaso.formatters import manager as formatters_manager +from plaso.lib import output +from plaso.lib import timelib +from plaso.output import helper + + +class Elastic(output.LogOutputFormatter): + """Saves the events into an ElasticSearch database.""" + + # Add configuration data for this output module. + ARGUMENTS = [ + ('--case_name', { + 'dest': 'case_name', + 'type': unicode, + 'help': 'Add a case name. This will be the name of the index in ' + 'ElasticSearch.', + 'action': 'store', + 'default': ''}), + ('--document_type', { + 'dest': 'document_type', + 'type': unicode, + 'help': 'Name of the document type. This is the name of the document ' + 'type that will be used in ElasticSearch.', + 'action': 'store', + 'default': ''}), + ('--elastic_server_ip', { + 'dest': 'elastic_server', + 'type': unicode, + 'help': ( + 'If the ElasticSearch database resides on a different server ' + 'than localhost this parameter needs to be passed in. This ' + 'should be the IP address or the hostname of the server.'), + 'action': 'store', + 'default': '127.0.0.1'}), + ('--elastic_port', { + 'dest': 'elastic_port', + 'type': int, + 'help': ( + 'By default ElasticSearch uses the port number 9200, if the ' + 'database is listening on a different port this parameter ' + 'can be defined.'), + 'action': 'store', + 'default': 9200})] + + def __init__( + self, store, filehandle=sys.stdout, config=None, filter_use=None): + """Initializes the Elastic output module.""" + super(Elastic, self).__init__(store, filehandle, config, filter_use) + self._counter = 0 + self._data = [] + # TODO: move this to an output module interface. + self._formatters_manager = formatters_manager.EventFormatterManager + + elastic_host = getattr(config, 'elastic_server', '127.0.0.1') + elastic_port = getattr(config, 'elastic_port', 9200) + self._elastic_db = pyelasticsearch.ElasticSearch( + u'http://{0:s}:{1:d}'.format(elastic_host, elastic_port)) + + case_name = getattr(config, 'case_name', u'') + document_type = getattr(config, 'document_type', u'') + + # case_name becomes the index name in Elastic. + if case_name: + self._index_name = case_name.lower() + else: + self._index_name = uuid.uuid4().hex + + # Name of the doc_type that holds the plaso events. + if document_type: + self._doc_type = document_type.lower() + else: + self._doc_type = u'event' + + # Build up a list of available hostnames in this storage file. + self._hostnames = {} + self._preprocesses = {} + + def _EventToDict(self, event_object): + """Returns a dict built from an EventObject.""" + ret_dict = event_object.GetValues() + + # Get rid of few attributes that cause issues (and need correcting). + if 'pathspec' in ret_dict: + del ret_dict['pathspec'] + + #if 'tag' in ret_dict: + # del ret_dict['tag'] + # tag = getattr(event_object, 'tag', None) + # if tag: + # tags = tag.tags + # ret_dict['tag'] = tags + # if getattr(tag, 'comment', ''): + # ret_dict['comment'] = tag.comment + ret_dict['tag'] = [] + + # To not overload the index, remove the regvalue index. + if 'regvalue' in ret_dict: + del ret_dict['regvalue'] + + # Adding attributes in that are calculated/derived. + # We want to remove millisecond precision (causes some issues in + # conversion). + ret_dict['datetime'] = timelib.Timestamp.CopyToIsoFormat( + timelib.Timestamp.RoundToSeconds(event_object.timestamp), + timezone=self.zone) + msg, _ = self._formatters_manager.GetMessageStrings(event_object) + ret_dict['message'] = msg + + source_type, source = self._formatters_manager.GetSourceStrings( + event_object) + + ret_dict['source_short'] = source_type + ret_dict['source_long'] = source + + hostname = getattr(event_object, 'hostname', '') + if self.store and not not hostname: + hostname = self._hostnames.get(event_object.store_number, '-') + + ret_dict['hostname'] = hostname + + # TODO: move this into a base output class. + username = getattr(event_object, 'username', '-') + if self.store: + pre_obj = self._preprocesses.get(event_object.store_number) + if pre_obj: + check_user = pre_obj.GetUsernameById(username) + + if check_user != '-': + username = check_user + + if username == '-' and hasattr(event_object, 'user_sid'): + username = getattr(event_object, 'user_sid', '-') + + ret_dict['username'] = username + + return ret_dict + + def EventBody(self, event_object): + """Prints out to a filehandle string representation of an EventObject. + + Each EventObject contains both attributes that are considered "reserved" + and others that aren't. The 'raw' representation of the object makes a + distinction between these two types as well as extracting the format + strings from the object. + + Args: + event_object: The EventObject. + """ + self._data.append(self._EventToDict(event_object)) + self._counter += 1 + + # Check if we need to flush. + if self._counter % 5000 == 0: + self._elastic_db.bulk_index(self._index_name, self._doc_type, self._data) + self._data = [] + sys.stdout.write('.') + sys.stdout.flush() + + def Start(self): + """Create the necessary mapping.""" + if self.store: + self._hostnames = helper.BuildHostDict(self.store) + for info in self.store.GetStorageInformation(): + if hasattr(info, 'store_range'): + for store_number in range(info.store_range[0], info.store_range[1]): + self._preprocesses[store_number] = info + + mapping = { + self._doc_type: { + u'_timestamp': { + u'enabled': True, + u'path': 'datetime', + u'format': 'date_time_no_millis'}, + } + } + # Check if the mappings exist (only create if not there). + try: + old_mapping_index = self._elastic_db.get_mapping(self._index_name) + old_mapping = old_mapping_index.get(self._index_name, {}) + if self._doc_type not in old_mapping: + self._elastic_db.put_mapping( + self._index_name, self._doc_type, mapping=mapping) + except (pyelasticsearch.ElasticHttpNotFoundError, + pyelasticsearch.exceptions.ElasticHttpError): + try: + self._elastic_db.create_index(self._index_name, settings={ + 'mappings': mapping}) + except pyelasticsearch.IndexAlreadyExistsError: + raise RuntimeError(u'Unable to created the index') + except requests.exceptions.ConnectionError as exception: + logging.error( + u'Unable to proceed, cannot connect to ElasticSearch backend ' + u'with error: {0:s}.\nPlease verify connection.'.format(exception)) + raise RuntimeError(u'Unable to connect to ElasticSearch backend.') + + # pylint: disable=unexpected-keyword-arg + self._elastic_db.health(wait_for_status='yellow') + + sys.stdout.write('Inserting data') + sys.stdout.flush() + + def End(self): + """Flush on last time.""" + self._elastic_db.bulk_index(self._index_name, self._doc_type, self._data) + self._data = [] + sys.stdout.write('. [DONE]\n') + sys.stdout.write('ElasticSearch index name: {0:s}\n'.format( + self._index_name)) + sys.stdout.flush() diff --git a/plaso/output/helper.py b/plaso/output/helper.py new file mode 100644 index 0000000..73af432 --- /dev/null +++ b/plaso/output/helper.py @@ -0,0 +1,109 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains helper functions for output modules.""" + +from plaso.lib import eventdata + + +def GetLegacy(evt): + """Return a legacy MACB representation of the event.""" + # TODO: Fix this function when the MFT parser has been implemented. + # The filestat parser is somewhat limited. + # Also fix this when duplicate entries have been implemented so that + # the function actually returns more than a single entry (as in combined). + if evt.data_type.startswith('fs:'): + letter = evt.timestamp_desc[0] + + if letter == 'm': + return 'M...' + elif letter == 'a': + return '.A..' + elif letter == 'c': + if evt.timestamp_desc[1] == 'r': + return '...B' + + return '..C.' + else: + return '....' + + # Access time. + if evt.timestamp_desc in [ + eventdata.EventTimestamp.ACCESS_TIME, + eventdata.EventTimestamp.ACCOUNT_CREATED, + eventdata.EventTimestamp.PAGE_VISITED, + eventdata.EventTimestamp.LAST_VISITED_TIME, + eventdata.EventTimestamp.START_TIME, + eventdata.EventTimestamp.LAST_SHUTDOWN, + eventdata.EventTimestamp.LAST_LOGIN_TIME, + eventdata.EventTimestamp.LAST_PASSWORD_RESET, + eventdata.EventTimestamp.LAST_CONNECTED, + eventdata.EventTimestamp.LAST_RUNTIME, + eventdata.EventTimestamp.LAST_PRINTED]: + return '.A..' + + # Content modification. + if evt.timestamp_desc in [ + eventdata.EventTimestamp.MODIFICATION_TIME, + eventdata.EventTimestamp.WRITTEN_TIME, + eventdata.EventTimestamp.DELETED_TIME]: + return 'M...' + + # Content creation time. + if evt.timestamp_desc in [ + eventdata.EventTimestamp.CREATION_TIME, + eventdata.EventTimestamp.ADDED_TIME, + eventdata.EventTimestamp.FILE_DOWNLOADED, + eventdata.EventTimestamp.FIRST_CONNECTED]: + return '...B' + + # Metadata modification. + if evt.timestamp_desc in [ + eventdata.EventTimestamp.CHANGE_TIME, + eventdata.EventTimestamp.ENTRY_MODIFICATION_TIME]: + return '..C.' + + return '....' + + +def BuildHostDict(storage_object): + """Return a dict object from a StorageFile object. + + Build a dict object based on the preprocess objects stored inside + a storage file. + + Args: + storage_object: The StorageFile object that stores all the EventObjects. + + Returns: + A dict object that has the store number as a key and the hostname + as the value to that key. + """ + host_dict = {} + if not storage_object: + return host_dict + + if not hasattr(storage_object, 'GetStorageInformation'): + return host_dict + + for info in storage_object.GetStorageInformation(): + if hasattr(info, 'store_range') and hasattr(info, 'hostname'): + for store_number in range(info.store_range[0], info.store_range[1]): + # TODO: A bit wasteful, if the range is large we are wasting keys. + # Rewrite this logic into a more optimal one. + host_dict[store_number] = info.hostname + + return host_dict diff --git a/plaso/output/json_out.py b/plaso/output/json_out.py new file mode 100644 index 0000000..daa11e8 --- /dev/null +++ b/plaso/output/json_out.py @@ -0,0 +1,40 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""An output module that saves data into a simple JSON format.""" + +from plaso.lib import output +from plaso.serializer import json_serializer + + +class Json(output.FileLogOutputFormatter): + """Saves the events into a JSON format.""" + + def EventBody(self, event_object): + """Prints out to a filehandle string representation of an EventObject. + + Each event object contains both attributes that are considered "reserved" + and others that aren't. The 'raw' representation of the object makes a + distinction between these two types as well as extracting the format + strings from the object. + + Args: + event_object: The event object (instance of EventObject). + """ + self.filehandle.WriteLine( + json_serializer.JsonEventObjectSerializer.WriteSerialized(event_object)) + self.filehandle.WriteLine(u'\n') diff --git a/plaso/output/json_out_test.py b/plaso/output/json_out_test.py new file mode 100644 index 0000000..b82bf4b --- /dev/null +++ b/plaso/output/json_out_test.py @@ -0,0 +1,90 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the JSON output class.""" + +import StringIO +import unittest + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory + +from plaso.lib import event +from plaso.lib import timelib_test +from plaso.output import json_out + + +class JsonTestEvent(event.EventObject): + """Simplified EventObject for testing.""" + DATA_TYPE = 'test:l2tjson' + + def __init__(self): + """Initialize event with data.""" + super(JsonTestEvent, self).__init__() + self.timestamp = timelib_test.CopyStringToTimestamp( + '2012-06-27 18:17:01+00:00') + self.hostname = u'ubuntu' + self.display_name = u'OS: /var/log/syslog.1' + self.inode = 12345678 + self.text = ( + u'Reporter PID: |8442| (pam_unix(cron:session): session\n ' + u'closed for user root)') + self.username = u'root' + + os_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=u'/cases/image.dd') + self.pathspec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TSK, inode=15, location=u'/var/log/syslog.1', + parent=os_path_spec) + + +class JsonOutputTest(unittest.TestCase): + """Tests for the JSON outputter.""" + + def setUp(self): + """Sets up the objects needed for this test.""" + self.output = StringIO.StringIO() + self.formatter = json_out.Json(None, self.output) + self.event_object = JsonTestEvent() + + def testStartAndEnd(self): + """Test to ensure start and end functions do not add text.""" + self.formatter.Start() + self.assertEquals(self.output.getvalue(), u'') + self.formatter.End() + self.assertEquals(self.output.getvalue(), u'') + + def testEventBody(self): + """Test ensures that returned lines returned are formatted as JSON.""" + + expected_string = ( + '{{"username": "root", "display_name": "OS: /var/log/syslog.1", ' + '"uuid": "{0:s}", "data_type": "test:l2tjson", ' + '"timestamp": 1340821021000000, "hostname": "ubuntu", "text": ' + '"Reporter PID: |8442| (pam_unix(cron:session): session\\n ' + 'closed for user root)", "pathspec": "{{\\"type_indicator\\": ' + '\\"TSK\\", \\"inode\\": 15, \\"location\\": \\"/var/log/syslog.1\\", ' + '\\"parent\\": \\"{{\\\\\\"type_indicator\\\\\\": \\\\\\"OS\\\\\\", ' + '\\\\\\"location\\\\\\": \\\\\\"/cases/image.dd\\\\\\"}}\\"}}", ' + '"inode": 12345678}}\n').format(self.event_object.uuid) + + self.formatter.EventBody(self.event_object) + self.assertEquals(self.output.getvalue(), expected_string) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/output/l2t_csv.py b/plaso/output/l2t_csv.py new file mode 100644 index 0000000..5278703 --- /dev/null +++ b/plaso/output/l2t_csv.py @@ -0,0 +1,144 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains functions for outputting as l2t_csv. + +Author description at: http://code.google.com/p/log2timeline/wiki/l2t_csv +""" + +import logging +import re + +from plaso.formatters import manager as formatters_manager +from plaso.lib import errors +from plaso.lib import output +from plaso.lib import timelib +from plaso.lib import utils +from plaso.output import helper + + +class L2tcsv(output.FileLogOutputFormatter): + """CSV format used by log2timeline, with 17 fixed fields.""" + + FORMAT_ATTRIBUTE_RE = re.compile('{([^}]+)}') + + def Start(self): + """Returns a header for the output.""" + # Build a hostname and username dict objects. + self._hostnames = {} + if self.store: + self._hostnames = helper.BuildHostDict(self.store) + self._preprocesses = {} + for info in self.store.GetStorageInformation(): + if hasattr(info, 'store_range'): + for store_number in range( + info.store_range[0], info.store_range[1] + 1): + self._preprocesses[store_number] = info + + self.filehandle.WriteLine( + u'date,time,timezone,MACB,source,sourcetype,type,user,host,short,desc,' + u'version,filename,inode,notes,format,extra\n') + + def WriteEvent(self, event_object): + """Write a single event.""" + try: + self.EventBody(event_object) + except errors.NoFormatterFound: + logging.error(u'Unable to output line, no formatter found.') + logging.error(event_object) + + def EventBody(self, event_object): + """Formats data as l2t_csv and writes to the filehandle from OutputFormater. + + Args: + event_object: The event object (EventObject). + + Raises: + errors.NoFormatterFound: If no formatter for that event is found. + """ + if not hasattr(event_object, 'timestamp'): + return + + # TODO: move this to an output module interface. + event_formatter = formatters_manager.EventFormatterManager.GetFormatter( + event_object) + if not event_formatter: + raise errors.NoFormatterFound( + u'Unable to find event formatter for: {0:s}.'.format( + event_object.DATA_TYPE)) + + msg, msg_short = event_formatter.GetMessages(event_object) + source_short, source_long = event_formatter.GetSources(event_object) + + date_use = timelib.Timestamp.CopyToDatetime( + event_object.timestamp, self.zone) + extras = [] + format_variables = self.FORMAT_ATTRIBUTE_RE.findall( + event_formatter.format_string) + for key in event_object.GetAttributes(): + if key in utils.RESERVED_VARIABLES or key in format_variables: + continue + # Force a string conversion since some of the extra attributes + # can be numbers or bools. + value = getattr(event_object, key) + extras.append(u'{0:s}: {1!s} '.format(key, value)) + extra = ' '.join(extras) + + inode = getattr(event_object, 'inode', '-') + if inode == '-': + if hasattr(event_object, 'pathspec') and hasattr( + event_object.pathspec, 'image_inode'): + inode = event_object.pathspec.image_inode + + hostname = getattr(event_object, 'hostname', u'') + + # TODO: move this into a base output class. + username = getattr(event_object, 'username', u'-') + if self.store: + if not hostname: + hostname = self._hostnames.get(event_object.store_number, u'-') + + pre_obj = self._preprocesses.get(event_object.store_number) + if pre_obj: + check_user = pre_obj.GetUsernameById(username) + if check_user != '-': + username = check_user + + row = ( + '{0:02d}/{1:02d}/{2:04d}'.format( + date_use.month, date_use.day, date_use.year), + '{0:02d}:{1:02d}:{2:02d}'.format( + date_use.hour, date_use.minute, date_use.second), + self.zone, + helper.GetLegacy(event_object), + source_short, + source_long, + getattr(event_object, 'timestamp_desc', u'-'), + username, + hostname, + msg_short, + msg, + '2', + getattr(event_object, 'display_name', u'-'), + inode, + getattr(event_object, 'notes', u'-'), # Notes field placeholder. + getattr(event_object, 'parser', u'-'), + extra.replace('\n', u'-').replace('\r', u'')) + + out_write = u'{0:s}\n'.format( + u','.join(unicode(x).replace(',', u' ') for x in row)) + self.filehandle.WriteLine(out_write) diff --git a/plaso/output/l2t_csv_test.py b/plaso/output/l2t_csv_test.py new file mode 100644 index 0000000..8cb1cfe --- /dev/null +++ b/plaso/output/l2t_csv_test.py @@ -0,0 +1,95 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the L2tCsv output class.""" + +import StringIO +import unittest + +from plaso.formatters import interface as formatters_interface +from plaso.lib import event +from plaso.lib import eventdata +from plaso.output import l2t_csv + + +class L2tTestEvent(event.EventObject): + """Simplified EventObject for testing.""" + DATA_TYPE = 'test:l2t_csv' + + def __init__(self): + """Initialize event with data.""" + super(L2tTestEvent, self).__init__() + self.timestamp = 1340821021000000 + self.timestamp_desc = eventdata.EventTimestamp.WRITTEN_TIME + self.hostname = u'ubuntu' + self.filename = u'log/syslog.1' + self.display_name = u'log/syslog.1' + self.some_additional_foo = True + self.my_number = 123 + self.text = ( + u'Reporter PID: 8442 (pam_unix(cron:session): session\n ' + u'closed for user root)') + + +class L2tTestEventFormatter(formatters_interface.EventFormatter): + """Formatter for the test event.""" + DATA_TYPE = 'test:l2t_csv' + FORMAT_STRING = u'{text}' + + SOURCE_SHORT = 'LOG' + SOURCE_LONG = 'Syslog' + + +class L2tCsvTest(unittest.TestCase): + """Contains tests to validate the L2tCSV outputter.""" + def setUp(self): + self.output = StringIO.StringIO() + self.formatter = l2t_csv.L2tcsv(None, self.output) + self.event_object = L2tTestEvent() + + def testStart(self): + """Test ensures header line is outputted as expected.""" + + correct_line = ( + u'date,time,timezone,MACB,source,sourcetype,type,user,host,short,desc,' + u'version,filename,inode,notes,format,extra\n') + + self.formatter.Start() + self.assertEquals(self.output.getvalue(), correct_line) + + def testEventBody(self): + """Test ensures that returned lines returned are formatted as L2tCSV.""" + + self.formatter.EventBody(self.event_object) + correct = ( + u'06/27/2012,18:17:01,UTC,M...,LOG,Syslog,Content Modification Time,-,' + u'ubuntu,Reporter PID: 8442 (pam_unix(cron:session): session ' + u'closed for user root),Reporter PID: 8442 ' + u'(pam_unix(cron:session): ' + u'session closed for user root),2,log/syslog.1,-,-,-,my_number: 123 ' + u'some_additional_foo: True \n') + self.assertEquals(self.output.getvalue(), correct) + + def testEventBodyNoExtraCommas(self): + """Test ensures that the only commas returned are the 16 delimeters.""" + + self.formatter.EventBody(self.event_object) + self.assertEquals(self.output.getvalue().count(u','), 16) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/output/l2t_tln.py b/plaso/output/l2t_tln.py new file mode 100644 index 0000000..5246d18 --- /dev/null +++ b/plaso/output/l2t_tln.py @@ -0,0 +1,122 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains a class for outputting in the l2tTLN format. + +l2tTLN is TLN as expanded in L2T 0.65 to 7 fields: + +https://code.google.com/p/log2timeline/source/browse/lib/Log2t/output/tln.pm + +Fields: + Time - 32 bit Unix epoch. + Source - The plugin that produced the data. + Host - The source host system. + User - The user associated with the data. + Description - Message string describing the data. + TZ - L2T 0.65 field. Timezone of the event. + Notes - L2T 0.65 field. Optional notes field or filename and inode. +""" + +import logging + +from plaso.formatters import manager as formatters_manager +from plaso.lib import errors +from plaso.lib import output +from plaso.lib import timelib +from plaso.output import helper + + +class L2ttln(output.FileLogOutputFormatter): + """Extended seven field pipe delimited TLN; L2T 0.65 style.""" + + DELIMITER = u'|' + + def Start(self): + """Returns a header for the output.""" + # Build a hostname and username dict objects. + self._hostnames = {} + if self.store: + self._hostnames = helper.BuildHostDict(self.store) + self._preprocesses = {} + for info in self.store.GetStorageInformation(): + if hasattr(info, 'store_range'): + for store_number in range( + info.store_range[0], info.store_range[1] + 1): + self._preprocesses[store_number] = info + self.filehandle.WriteLine(u'Time|Source|Host|User|Description|TZ|Notes\n') + + def WriteEvent(self, event_object): + """Write a single event.""" + try: + self.EventBody(event_object) + except errors.NoFormatterFound: + logging.error(u'Unable to output line, no formatter found.') + logging.error(event_object.GetString()) + + def EventBody(self, event_object): + """Formats data as TLN and writes to the filehandle from OutputFormater. + + Args: + event_object: The event object (EventObject). + + Raises: + errors.NoFormatterFound: If no formatter for that event is found. + """ + if not hasattr(event_object, 'timestamp'): + return + + # TODO: move this to an output module interface. + event_formatter = formatters_manager.EventFormatterManager.GetFormatter( + event_object) + if not event_formatter: + raise errors.NoFormatterFound( + u'Unable to find event formatter for: {0:s}.'.format( + event_object.DATA_TYPE)) + + msg, _ = event_formatter.GetMessages(event_object) + source_short, _ = event_formatter.GetSources(event_object) + + date_use = timelib.Timestamp.CopyToPosix(event_object.timestamp) + hostname = getattr(event_object, 'hostname', u'') + username = getattr(event_object, 'username', u'') + + if self.store: + if not hostname: + hostname = self._hostnames.get(event_object.store_number, '') + + pre_obj = self._preprocesses.get(event_object.store_number) + if pre_obj: + check_user = pre_obj.GetUsernameById(username) + if check_user != '-': + username = check_user + + notes = getattr(event_object, 'notes', u'') + if not notes: + notes = u'File: {0:s} inode: {1!s}'.format( + getattr(event_object, 'display_name', u''), + getattr(event_object, 'inode', u'')) + + out_write = u'{0!s}|{1:s}|{2:s}|{3:s}|{4:s}|{5:s}|{6!s}\n'.format( + date_use, + source_short.replace(self.DELIMITER, u' '), + hostname.replace(self.DELIMITER, u' '), + username.replace(self.DELIMITER, u' '), + msg.replace(self.DELIMITER, u' '), + self.zone, + notes.replace(self.DELIMITER, u' ')) + + self.filehandle.WriteLine(out_write) diff --git a/plaso/output/l2t_tln_test.py b/plaso/output/l2t_tln_test.py new file mode 100644 index 0000000..5b77ed4 --- /dev/null +++ b/plaso/output/l2t_tln_test.py @@ -0,0 +1,86 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the l2tTLN output class.""" + +import StringIO +import unittest + +from plaso.formatters import interface as formatters_interface +from plaso.lib import event +from plaso.output import l2t_tln + + +class TlnTestEvent(event.EventObject): + """Simplified EventObject for testing.""" + DATA_TYPE = 'test:tln' + + def __init__(self): + """Initialize event with data.""" + super(TlnTestEvent, self).__init__() + self.timestamp = 1340821021000000 + self.hostname = u'ubuntu' + self.display_name = u'OS: log/syslog.1' + self.inode = 12345678 + self.text = ( + u'Reporter PID: |8442| (pam_unix(cron:session): session\n ' + u'closed for user root)') + self.username = u'root' + + +class TlnTestEventFormatter(formatters_interface.EventFormatter): + """Formatter for the test event.""" + DATA_TYPE = 'test:tln' + FORMAT_STRING = u'{text}' + SOURCE_SHORT = 'LOG' + SOURCE_LONG = 'Syslog' + + +class L2TTlnTest(unittest.TestCase): + """Tests for the TLN outputter.""" + + def setUp(self): + """Sets up the objects needed for this test.""" + self.output = StringIO.StringIO() + self.formatter = l2t_tln.L2ttln(None, self.output) + self.event_object = TlnTestEvent() + + def testStart(self): + """Test ensures header line is outputted as expected.""" + correct_line = u'Time|Source|Host|User|Description|TZ|Notes\n' + + self.formatter.Start() + self.assertEquals(self.output.getvalue(), correct_line) + + def testEventBody(self): + """Test ensures that returned lines returned are formatted as TLN.""" + + self.formatter.EventBody(self.event_object) + correct = (u'1340821021|LOG|ubuntu|root|Reporter PID: 8442 ' + u'(pam_unix(cron:session): session closed for user root)|UTC' + u'|File: OS: log/syslog.1 inode: 12345678\n') + self.assertEquals(self.output.getvalue(), correct) + + def testEventBodyNoStrayPipes(self): + """Test ensures that the only pipes are the six field delimiters.""" + + self.formatter.EventBody(self.event_object) + self.assertEquals(self.output.getvalue().count(u'|'), 6) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/output/mysql_4n6.py b/plaso/output/mysql_4n6.py new file mode 100644 index 0000000..b130ddb --- /dev/null +++ b/plaso/output/mysql_4n6.py @@ -0,0 +1,402 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import re +import sys + +import MySQLdb + +from plaso import formatters +from plaso.formatters import interface as formatters_interface +from plaso.formatters import manager as formatters_manager +from plaso.lib import errors +from plaso.lib import output +from plaso.lib import timelib +from plaso.lib import utils +from plaso.output import helper + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class Mysql4n6(output.LogOutputFormatter): + """Contains functions for outputting as 4n6time MySQL database.""" + + FORMAT_ATTRIBUTE_RE = re.compile('{([^}]+)}') + + META_FIELDS = ['sourcetype', 'source', 'user', 'host', 'MACB', + 'color', 'type', 'record_number'] + + ARGUMENTS = [ + ('--db_user', { + 'dest': 'db_user', + 'type': unicode, + 'help': 'Defines the database user.', + 'metavar': 'USERNAME', + 'action': 'store', + 'default': 'root'}), + ('--db_host', { + 'dest': 'db_host', + 'metavar': 'HOSTNAME', + 'type': unicode, + 'help': ( + 'Defines the IP address or the hostname of the database ' + 'server.'), + 'action': 'store', + 'default': 'localhost'}), + ('--db_pass', { + 'dest': 'db_pass', + 'metavar': 'PASSWORD', + 'type': unicode, + 'help': 'The password for the database user.', + 'action': 'store', + 'default': 'forensic'}), + ('--db_name', { + 'dest': 'db_name', + 'type': unicode, + 'help': 'The name of the database to connect to.', + 'action': 'store', + 'default': 'log2timeline'}), + ('--append', { + 'dest': 'append', + 'action': 'store_true', + 'help': ( + 'Defines whether the intention is to append to an already ' + 'existing database or overwrite it. Defaults to overwrite.'), + 'default': False}), + ('--fields', { + 'dest': 'fields', + 'action': 'store', + 'type': unicode, + 'nargs': '*', + 'help': 'Defines which fields should be indexed in the database.', + 'default': [ + 'host', 'user', 'source', 'sourcetype', 'type', 'datetime', + 'color']}), + ('--evidence', { + 'dest': 'evidence', + 'action': 'store', + 'help': ( + 'Set the evidence field to a specific value, defaults to ' + 'empty.'), + 'type': unicode, + 'default': '-'})] + + def __init__(self, store, filehandle=sys.stdout, config=None, + filter_use=None): + """Constructor for the output module. + + Args: + store: The storage object. + filehandle: A file-like object that can be written to. + config: The configuration object for the module. + filter_use: The filter object used. + """ + # TODO: Add a unit test for this output module. + super(Mysql4n6, self).__init__(store, filehandle, config, filter_use) + # TODO: move this to an output module interface. + self._formatters_manager = formatters_manager.EventFormatterManager + + self.set_status = getattr(config, 'set_status', None) + + self.host = getattr(config, 'db_host', 'localhost') + self.user = getattr(config, 'db_user', 'root') + self.password = getattr(config, 'db_pass', 'forensic') + self.dbname = getattr(config, 'db_name', 'log2timeline') + self.evidence = getattr(config, 'evidence', '-') + self.append = getattr(config, 'append', False) + self.fields = getattr(config, 'fields', [ + 'host', 'user', 'source', 'sourcetype', 'type', 'datetime', 'color']) + + def Start(self): + """Connect to the database and create the table before inserting.""" + if self.dbname == '': + raise IOError(u'Specify a database name.') + + try: + if self.append: + self.conn = MySQLdb.connect(self.host, self.user, + self.password, self.dbname) + self.curs = self.conn.cursor() + else: + self.conn = MySQLdb.connect(self.host, self.user, self.password) + self.curs = self.conn.cursor() + + self.conn.set_character_set(u'utf8') + self.curs.execute(u'SET NAMES utf8') + self.curs.execute(u'SET CHARACTER SET utf8') + self.curs.execute(u'SET character_set_connection=utf8') + self.curs.execute(u'SET GLOBAL innodb_large_prefix=ON') + self.curs.execute(u'SET GLOBAL innodb_file_format=barracuda') + self.curs.execute(u'SET GLOBAL innodb_file_per_table=ON') + self.curs.execute( + u'CREATE DATABASE IF NOT EXISTS {0:s}'.format(self.dbname)) + self.curs.execute(u'USE {0:s}'.format(self.dbname)) + # Create tables. + self.curs.execute( + (u'CREATE TABLE IF NOT EXISTS log2timeline (' + u'rowid INT NOT NULL AUTO_INCREMENT, timezone VARCHAR(256), ' + u'MACB VARCHAR(256), source VARCHAR(256), sourcetype VARCHAR(256), ' + u'type VARCHAR(256), user VARCHAR(256), host VARCHAR(256), ' + u'description TEXT, filename VARCHAR(256), inode VARCHAR(256), ' + u'notes VARCHAR(256), format VARCHAR(256), ' + u'extra TEXT, datetime datetime, reportnotes VARCHAR(256), ' + u'inreport VARCHAR(256), tag VARCHAR(256), color VARCHAR(256), ' + u'offset INT, store_number INT, store_index INT, ' + u'vss_store_number INT, URL TEXT, ' + u'record_number VARCHAR(256), event_identifier VARCHAR(256), ' + u'event_type VARCHAR(256), source_name VARCHAR(256), ' + u'user_sid VARCHAR(256), computer_name VARCHAR(256), ' + u'evidence VARCHAR(256), ' + u'PRIMARY KEY (rowid)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED')) + if self.set_status: + self.set_status(u'Created table: log2timeline') + + for field in self.META_FIELDS: + self.curs.execute( + u'CREATE TABLE IF NOT EXISTS l2t_{0}s ({0}s TEXT, frequency INT) ' + u'ENGINE=InnoDB ROW_FORMAT=COMPRESSED'.format(field)) + if self.set_status: + self.set_status(u'Created table: l2t_{0:s}'.format(field)) + + self.curs.execute( + u'CREATE TABLE IF NOT EXISTS l2t_tags (tag TEXT) ' + u'ENGINE=InnoDB ROW_FORMAT=COMPRESSED') + if self.set_status: + self.set_status(u'Created table: l2t_tags') + + self.curs.execute( + u'CREATE TABLE IF NOT EXISTS l2t_saved_query (' + u'name TEXT, query TEXT) ' + u'ENGINE=InnoDB ROW_FORMAT=COMPRESSED') + if self.set_status: + self.set_status(u'Created table: l2t_saved_query') + + self.curs.execute( + u'CREATE TABLE IF NOT EXISTS l2t_disk (' + u'disk_type INT, mount_path TEXT, ' + u'dd_path TEXT, dd_offset TEXT, ' + u'storage_file TEXT, export_path TEXT) ' + u'ENGINE=InnoDB ROW_FORMAT=COMPRESSED') + self.curs.execute( + u'INSERT INTO l2t_disk (' + u'disk_type, mount_path, dd_path, ' + u'dd_offset, storage_file, ' + u'export_path) VALUES ' + u'(0, "", "", "", "", "")') + if self.set_status: + self.set_status(u'Created table: l2t_disk') + except MySQLdb.Error as exception: + raise IOError(u'Unable to insert into database with error: {0:s}'.format( + exception)) + + self.count = 0 + + def End(self): + """Create indices and commit the transaction.""" + # Build up indices for the fields specified in the args. + # It will commit the inserts automatically before creating index. + if not self.append: + for field_name in self.fields: + sql = u'CREATE INDEX {0}_idx ON log2timeline ({0:s})'.format(field_name) + self.curs.execute(sql) + if self.set_status: + self.set_status(u'Created index: {0:s}'.format(field_name)) + + # Get meta info and save into their tables. + if self.set_status: + self.set_status(u'Creating metadata...') + + for field in self.META_FIELDS: + vals = self._GetDistinctValues(field) + self.curs.execute(u'DELETE FROM l2t_{0:s}s'.format(field)) + for name, freq in vals.items(): + self.curs.execute(( + u'INSERT INTO l2t_{0:s}s ({1:s}s, frequency) ' + u'VALUES("{2:s}", {3:d}) ').format(field, field, name, freq)) + + self.curs.execute(u'DELETE FROM l2t_tags') + for tag in self._ListTags(): + self.curs.execute( + u'INSERT INTO l2t_tags (tag) VALUES ("{0:s}")'.format(tag)) + + if self.set_status: + self.set_status(u'Database created.') + + self.conn.commit() + self.curs.close() + self.conn.close() + + def _GetDistinctValues(self, field_name): + """Query database for unique field types.""" + self.curs.execute( + u'SELECT {0}, COUNT({0}) FROM log2timeline GROUP BY {0}'.format( + field_name)) + res = {} + for row in self.curs.fetchall(): + if row[0] != '': + res[row[0]] = int(row[1]) + return res + + def _ListTags(self): + """Query database for unique tag types.""" + all_tags = [] + self.curs.execute( + u'SELECT DISTINCT tag FROM log2timeline') + + # This cleans up the messy SQL return. + for tag_row in self.curs.fetchall(): + tag_string = tag_row[0] + if tag_string: + tags = tag_string.split(',') + for tag in tags: + if tag not in all_tags: + all_tags.append(tag) + return all_tags + + def EventBody(self, event_object): + """Formats data as 4n6time database table format and writes to the db. + + Args: + event_object: The event object (EventObject). + + Raises: + raise errors.NoFormatterFound: If no formatter for this event is found. + """ + if not hasattr(event_object, 'timestamp'): + return + + event_formatter = self._formatters_manager.GetFormatter(event_object) + if not event_formatter: + raise errors.NoFormatterFound( + u'Unable to output event, no event formatter found.') + + if (isinstance( + event_formatter, formatters.winreg.WinRegistryGenericFormatter) and + event_formatter.FORMAT_STRING.find('<|>') == -1): + event_formatter.FORMAT_STRING = u'[{keyname}]<|>{text}<|>' + + elif isinstance( + event_formatter, formatters_interface.ConditionalEventFormatter): + event_formatter.FORMAT_STRING_SEPARATOR = u'<|>' + + elif isinstance(event_formatter, formatters_interface.EventFormatter): + event_formatter.format_string = event_formatter.format_string.replace( + '}', '}<|>') + + msg, _ = event_formatter.GetMessages(event_object) + source_short, source_long = event_formatter.GetSources(event_object) + + date_use = timelib.Timestamp.CopyToDatetime( + event_object.timestamp, self.zone) + if not date_use: + logging.error(u'Unable to process date for entry: {0:s}'.format(msg)) + return + extra = [] + format_variables = self.FORMAT_ATTRIBUTE_RE.findall( + event_formatter.format_string) + for key in event_object.GetAttributes(): + if key in utils.RESERVED_VARIABLES or key in format_variables: + continue + extra.append(u'{0:s}: {1!s} '.format( + key, getattr(event_object, key, None))) + + extra = u' '.join(extra) + + inode = getattr(event_object, 'inode', '-') + if inode == '-': + if (hasattr(event_object, 'pathspec') and + hasattr(event_object.pathspec, 'image_inode')): + inode = event_object.pathspec.image_inode + + date_use_string = u'{0:d}-{1:d}-{2:d} {3:d}:{4:d}:{5:d}'.format( + date_use.year, date_use.month, date_use.day, date_use.hour, + date_use.minute, date_use.second) + + tags = [] + if hasattr(event_object, 'tag') and hasattr(event_object.tag, 'tags'): + tags = event_object.tag.tags + else: + tags = u'' + + taglist = u','.join(tags) + row = ( + str(self.zone), + helper.GetLegacy(event_object), + source_short, + source_long, + getattr(event_object, 'timestamp_desc', '-'), + getattr(event_object, 'username', '-'), + getattr(event_object, 'hostname', '-'), + msg, + getattr(event_object, 'filename', '-'), + inode, + getattr(event_object, 'notes', '-'), + getattr(event_object, 'parser', '-'), + extra, + date_use_string, + '', + '', + taglist, + '', + getattr(event_object, 'offset', 0), + event_object.store_number, + event_object.store_index, + self.GetVSSNumber(event_object), + getattr(event_object, 'url', '-'), + getattr(event_object, 'record_number', 0), + getattr(event_object, 'event_identifier', '-'), + getattr(event_object, 'event_type', '-'), + getattr(event_object, 'source_name', '-'), + getattr(event_object, 'user_sid', '-'), + getattr(event_object, 'computer_name', '-'), + self.evidence) + + try: + self.curs.execute( + 'INSERT INTO log2timeline(timezone, MACB, source, ' + 'sourcetype, type, user, host, description, filename, ' + 'inode, notes, format, extra, datetime, reportnotes, ' + 'inreport, tag, color, offset, store_number, ' + 'store_index, vss_store_number, URL, record_number, ' + 'event_identifier, event_type, source_name, user_sid, ' + 'computer_name, evidence) VALUES (' + '%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, ' + '%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, ' + '%s, %s, %s, %s)', row) + except MySQLdb.Error as exception: + logging.warning( + u'Unable to insert into database with error: {0:s}.'.format( + exception)) + + self.count += 1 + + # TODO: Experiment if committing the current transaction + # every 10000 inserts is the optimal approach. + if self.count % 10000 == 0: + self.conn.commit() + if self.set_status: + self.set_status(u'Inserting event: {0:d}'.format(self.count)) + + def GetVSSNumber(self, event_object): + """Return the vss_store_number of the event.""" + if not hasattr(event_object, 'pathspec'): + return -1 + + return getattr(event_object.pathspec, 'vss_store_number', -1) diff --git a/plaso/output/pstorage.py b/plaso/output/pstorage.py new file mode 100644 index 0000000..2da8f1b --- /dev/null +++ b/plaso/output/pstorage.py @@ -0,0 +1,62 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implements a StorageFile output formatter.""" + +from plaso.lib import event +from plaso.lib import output +from plaso.lib import storage +from plaso.lib import timelib + + +class Pstorage(output.LogOutputFormatter): + """Dumps event objects to a plaso storage file.""" + + def Start(self): + """Sets up the output storage file.""" + pre_obj = event.PreprocessObject() + pre_obj.collection_information = {'time_of_run': timelib.Timestamp.GetNow()} + if hasattr(self._config, 'filter') and self._config.filter: + pre_obj.collection_information['filter'] = self._config.filter + if hasattr(self._config, 'storagefile') and self._config.storagefile: + pre_obj.collection_information[ + 'file_processed'] = self._config.storagefile + self._storage = storage.StorageFile(self.filehandle, pre_obj=pre_obj) + + def EventBody(self, event_object): + """Add an EventObject protobuf to the storage file. + + Args: + proto: The EventObject protobuf. + """ + # Needed due to duplicate removals, if two events + # are merged then we'll just pick the first inode value. + inode = getattr(event_object, 'inode', None) + if type(inode) in (str, unicode): + inode_list = inode.split(';') + try: + new_inode = int(inode_list[0]) + except (ValueError, IndexError): + new_inode = 0 + + event_object.inode = new_inode + + self._storage.AddEventObject(event_object) + + def End(self): + """Closes the storage file.""" + self._storage.Close() diff --git a/plaso/output/pstorage_test.py b/plaso/output/pstorage_test.py new file mode 100644 index 0000000..dae4301 --- /dev/null +++ b/plaso/output/pstorage_test.py @@ -0,0 +1,93 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for plaso.output.pstorage.""" + +import os +import shutil +import tempfile +import unittest + +from plaso.lib import output +from plaso.lib import pfilter +from plaso.lib import storage +from plaso.output import pstorage # pylint: disable=unused-import + + +class TempDirectory(object): + """A self cleaning temporary directory.""" + + def __init__(self): + """Initializes the temporary directory.""" + super(TempDirectory, self).__init__() + self.name = u'' + + def __enter__(self): + """Make this work with the 'with' statement.""" + self.name = tempfile.mkdtemp() + return self.name + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Make this work with the 'with' statement.""" + shutil.rmtree(self.name, True) + + +class PstorageTest(unittest.TestCase): + def setUp(self): + self.test_filename = os.path.join('test_data', 'psort_test.out') + + # Show full diff results, part of TestCase so does not follow our naming + # conventions. + self.maxDiff = None + pfilter.TimeRangeCache.ResetTimeConstraints() + + def testOutput(self): + with TempDirectory() as dirname: + dump_file = os.path.join(dirname, 'plaso.db') + # Copy events to pstorage dump. + with storage.StorageFile(self.test_filename, read_only=True) as store: + formatter_cls = output.GetOutputFormatter('Pstorage') + formatter = formatter_cls(store, dump_file) + with output.EventBuffer(formatter, check_dedups=False) as output_buffer: + event_object = formatter.FetchEntry() + while event_object: + output_buffer.Append(event_object) + event_object = formatter.FetchEntry() + + # Make sure original and dump have the same events. + original = storage.StorageFile(self.test_filename, read_only=True) + dump = storage.StorageFile(dump_file, read_only=True) + event_object_original = original.GetSortedEntry() + event_object_dump = dump.GetSortedEntry() + original_list = [] + dump_list = [] + + while event_object_original: + original_list.append(event_object_original.EqualityString()) + dump_list.append(event_object_dump.EqualityString()) + event_object_original = original.GetSortedEntry() + event_object_dump = dump.GetSortedEntry() + + self.assertFalse(event_object_dump) + + for original_str, dump_str in zip( + sorted(original_list), sorted(dump_list)): + self.assertEqual(original_str, dump_str) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/output/rawpy.py b/plaso/output/rawpy.py new file mode 100644 index 0000000..136dd0b --- /dev/null +++ b/plaso/output/rawpy.py @@ -0,0 +1,41 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Represents an EventObject as a string.""" +from plaso.lib import output + + +class Rawpy(output.FileLogOutputFormatter): + """Prints out a "raw" interpretation of the EventObject.""" + # TODO: Revisit the name of this class, perhaps rename it to + # something more closely similar to what it is doing now, as in + # "native" or something else. + + def EventBody(self, event_object): + """Prints out to a filehandle string representation of an EventObject. + + Each EventObject contains both attributes that are considered "reserved" + and others that aren't. The 'raw' representation of the object makes a + distinction between these two types as well as extracting the format + strings from the object. + + Args: + event_object: The EventObject. + """ + # TODO: Move the unicode cast into the event object itself, expose + # a ToString function or something similar that will send back the + # unicode string. + self.filehandle.WriteLine(unicode(event_object)) diff --git a/plaso/output/sqlite_4n6.py b/plaso/output/sqlite_4n6.py new file mode 100644 index 0000000..96cae45 --- /dev/null +++ b/plaso/output/sqlite_4n6.py @@ -0,0 +1,316 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import re +import sys + +import sqlite3 + +from plaso import formatters +from plaso.formatters import interface as formatters_interface +from plaso.formatters import manager as formatters_manager +from plaso.lib import errors +from plaso.lib import output +from plaso.lib import timelib +from plaso.lib import utils +from plaso.output import helper + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class Sql4n6(output.LogOutputFormatter): + """Saves the data in a SQLite database, used by the tool 4n6Time.""" + + FORMAT_ATTRIBUTE_RE = re.compile('{([^}]+)}') + + META_FIELDS = [ + 'sourcetype', 'source', 'user', 'host', 'MACB', 'color', 'type', + 'record_number'] + + def __init__(self, store, filehandle=sys.stdout, config=None, + filter_use=None): + """Constructor for the output module. + + Args: + store: The storage object. + filehandle: A file-like object that can be written to. + config: The configuration object for the module. + filter_use: The filter object used. + """ + # TODO: Add a unit test for this output module. + super(Sql4n6, self).__init__(store, filehandle, config, filter_use) + # TODO: move this to an output module interface. + self._formatters_manager = formatters_manager.EventFormatterManager + self.set_status = getattr(config, 'set_status', None) + + # TODO: Revisit handling this outside of plaso. + self.dbname = filehandle + self.evidence = getattr(config, 'evidence', '-') + self.append = getattr(config, 'append', False) + self.fields = getattr(config, 'fields', [ + 'host', 'user', 'source', 'sourcetype', 'type', 'datetime', 'color']) + + # Override LogOutputFormatter methods so it won't write to the file + # handle any more. + def Start(self): + """Connect to the database and create the table before inserting.""" + if self.filehandle == sys.stdout: + raise IOError( + u'Unable to connect to stdout as database, please specify a file.') + + if (not self.append) and os.path.isfile(self.filehandle): + raise IOError(( + u'Unable to use an already existing file for output ' + u'[{0:s}]').format(self.filehandle)) + + self.conn = sqlite3.connect(self.dbname) + self.conn.text_factory = str + self.curs = self.conn.cursor() + + # Create table in database. + if not self.append: + self.curs.execute( + ('CREATE TABLE log2timeline (timezone TEXT, ' + 'MACB TEXT, source TEXT, sourcetype TEXT, type TEXT, ' + 'user TEXT, host TEXT, description TEXT, filename TEXT, ' + 'inode TEXT, notes TEXT, format TEXT, extra TEXT, ' + 'datetime datetime, reportnotes TEXT, ' + 'inreport TEXT, tag TEXT, color TEXT, offset INT,' + 'store_number INT, store_index INT, vss_store_number INT,' + 'url TEXT, record_number TEXT, event_identifier TEXT, ' + 'event_type TEXT, source_name TEXT, user_sid TEXT, ' + 'computer_name TEXT, evidence TEXT)')) + if self.set_status: + self.set_status('Created table: log2timeline') + + for field in self.META_FIELDS: + self.curs.execute( + 'CREATE TABLE l2t_{0}s ({0}s TEXT, frequency INT)'.format(field)) + if self.set_status: + self.set_status('Created table: l2t_{0:s}'.format(field)) + + self.curs.execute('CREATE TABLE l2t_tags (tag TEXT)') + if self.set_status: + self.set_status('Created table: l2t_tags') + + self.curs.execute('CREATE TABLE l2t_saved_query (name TEXT, query TEXT)') + if self.set_status: + self.set_status('Created table: l2t_saved_query') + + self.curs.execute('CREATE TABLE l2t_disk (disk_type INT, mount_path TEXT,' + ' dd_path TEXT, dd_offset TEXT, storage_file TEXT,' + ' export_path TEXT)') + self.curs.execute('INSERT INTO l2t_disk (disk_type, mount_path, dd_path,' + 'dd_offset, storage_file, export_path) VALUES ' + '(0, "", "", "", "", "")') + if self.set_status: + self.set_status('Created table: l2t_disk') + + self.count = 0 + + def End(self): + """Create indices and commit the transaction.""" + # Build up indices for the fields specified in the args. + # It will commit the inserts automatically before creating index. + if not self.append: + for field_name in self.fields: + sql = 'CREATE INDEX {0}_idx ON log2timeline ({0})'.format(field_name) + self.curs.execute(sql) + if self.set_status: + self.set_status('Created index: {0:s}'.format(field_name)) + + # Get meta info and save into their tables. + if self.set_status: + self.set_status('Creating metadata...') + + for field in self.META_FIELDS: + vals = self._GetDistinctValues(field) + self.curs.execute('DELETE FROM l2t_{0:s}s'.format(field)) + for name, freq in vals.items(): + self.curs.execute(( + 'INSERT INTO l2t_{0:s}s ({1:s}s, frequency) ' + 'VALUES("{2:s}", {3:d}) ').format(field, field, name, freq)) + self.curs.execute('DELETE FROM l2t_tags') + for tag in self._ListTags(): + self.curs.execute('INSERT INTO l2t_tags (tag) VALUES (?)', [tag]) + + if self.set_status: + self.set_status('Database created.') + + self.conn.commit() + self.curs.close() + self.conn.close() + + def _GetDistinctValues(self, field_name): + """Query database for unique field types.""" + self.curs.execute( + u'SELECT {0}, COUNT({0}) FROM log2timeline GROUP BY {0}'.format( + field_name)) + res = {} + for row in self.curs.fetchall(): + if row[0] != '': + res[row[0]] = int(row[1]) + return res + + def _ListTags(self): + """Query database for unique tag types.""" + all_tags = [] + self.curs.execute( + 'SELECT DISTINCT tag FROM log2timeline') + + # This cleans up the messy SQL return. + for tag_row in self.curs.fetchall(): + tag_string = tag_row[0] + if tag_string: + tags = tag_string.split(',') + for tag in tags: + if tag not in all_tags: + all_tags.append(tag) + return all_tags + + def StartEvent(self): + """Do nothing, just override the parent's StartEvent method.""" + pass + + def EndEvent(self): + """Do nothing, just override the parent's EndEvent method.""" + pass + + def EventBody(self, event_object): + """Formats data as the 4n6time table format and writes it to the database. + + Args: + event_object: The event object (EventObject). + + Raises: + raise errors.NoFormatterFound: If no event formatter was found. + """ + if 'timestamp' not in event_object.GetAttributes(): + return + + event_formatter = self._formatters_manager.GetFormatter(event_object) + if not event_formatter: + raise errors.NoFormatterFound( + 'Unable to output event, no event formatter found.') + + if (isinstance( + event_formatter, formatters.winreg.WinRegistryGenericFormatter) and + event_formatter.FORMAT_STRING.find('<|>') == -1): + event_formatter.FORMAT_STRING = u'[{keyname}]<|>{text}<|>' + + elif isinstance( + event_formatter, formatters_interface.ConditionalEventFormatter): + event_formatter.FORMAT_STRING_SEPARATOR = u'<|>' + + elif isinstance(event_formatter, formatters_interface.EventFormatter): + event_formatter.format_string = event_formatter.format_string.replace( + '}', '}<|>') + + msg, _ = event_formatter.GetMessages(event_object) + source_short, source_long = event_formatter.GetSources(event_object) + + date_use = timelib.Timestamp.CopyToDatetime( + event_object.timestamp, self.zone) + if not date_use: + logging.error(u'Unable to process date for entry: {0:s}'.format(msg)) + return + extra = [] + format_variables = self.FORMAT_ATTRIBUTE_RE.findall( + event_formatter.format_string) + for key in event_object.GetAttributes(): + if key in utils.RESERVED_VARIABLES or key in format_variables: + continue + extra.append(u'{0:s}: {1!s} '.format( + key, getattr(event_object, key, None))) + extra = u' '.join(extra) + + inode = getattr(event_object, 'inode', '-') + if inode == '-': + if (hasattr(event_object, 'pathspec') and + hasattr(event_object.pathspec, 'image_inode')): + inode = event_object.pathspec.image_inode + + date_use_string = u'{0:04d}-{1:02d}-{2:02d} {3:02d}:{4:02d}:{5:02d}'.format( + date_use.year, date_use.month, date_use.day, date_use.hour, + date_use.minute, date_use.second) + + tags = [] + if hasattr(event_object, 'tag'): + if hasattr(event_object.tag, 'tags'): + tags = event_object.tag.tags + taglist = ','.join(tags) + row = (str(self.zone), + helper.GetLegacy(event_object), + source_short, + source_long, + getattr(event_object, 'timestamp_desc', '-'), + getattr(event_object, 'username', '-'), + getattr(event_object, 'hostname', '-'), + msg, + getattr(event_object, 'filename', '-'), + inode, + getattr(event_object, 'notes', '-'), + getattr(event_object, 'parser', '-'), + extra, + date_use_string, + '', + '', + taglist, + '', + getattr(event_object, 'offset', 0), + event_object.store_number, + event_object.store_index, + GetVSSNumber(event_object), + getattr(event_object, 'url', '-'), + getattr(event_object, 'record_number', 0), + getattr(event_object, 'event_identifier', '-'), + getattr(event_object, 'event_type', '-'), + getattr(event_object, 'source_name', '-'), + getattr(event_object, 'user_sid', '-'), + getattr(event_object, 'computer_name', '-'), + self.evidence + ) + + self.curs.execute( + ('INSERT INTO log2timeline(timezone, MACB, source, ' + 'sourcetype, type, user, host, description, filename, ' + 'inode, notes, format, extra, datetime, reportnotes, inreport,' + 'tag, color, offset, store_number, store_index, vss_store_number,' + 'URL, record_number, event_identifier, event_type,' + 'source_name, user_sid, computer_name, evidence)' + ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,' + '?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'), row) + + self.count += 1 + + # Commit the current transaction every 10000 inserts. + if self.count % 10000 == 0: + self.conn.commit() + if self.set_status: + self.set_status('Inserting event: {0:d}'.format(self.count)) + + +def GetVSSNumber(event_object): + """Return the vss_store_number of the event.""" + if not hasattr(event_object, 'pathspec'): + return -1 + + return getattr(event_object.pathspec, 'vss_store_number', -1) diff --git a/plaso/output/tln.py b/plaso/output/tln.py new file mode 100644 index 0000000..903ffaa --- /dev/null +++ b/plaso/output/tln.py @@ -0,0 +1,110 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains a class for outputting in a TLN format. + +Output module based on TLN as described by: +http://windowsir.blogspot.com/2010/02/timeline-analysisdo-we-need-standard.html + +Fields: + Time - 32 bit Unix epoch. + Source - The plugin that produced the data. + Host - The source host system. + User - The user associated with the data. + Description - Message string describing the data. +""" + +import logging + +from plaso.formatters import manager as formatters_manager +from plaso.lib import errors +from plaso.lib import output +from plaso.lib import timelib +from plaso.output import helper + + +class Tln(output.FileLogOutputFormatter): + """Five field TLN pipe delimited outputter.""" + + DELIMITER = u'|' + + def Start(self): + """Returns a header for the output.""" + # Build a hostname and username dict objects. + self._hostnames = {} + if self.store: + self._hostnames = helper.BuildHostDict(self.store) + self._preprocesses = {} + for info in self.store.GetStorageInformation(): + if hasattr(info, 'store_range'): + for store_number in range( + info.store_range[0], info.store_range[1] + 1): + self._preprocesses[store_number] = info + self.filehandle.WriteLine(u'Time|Source|Host|User|Description\n') + + def WriteEvent(self, event_object): + """Write a single event.""" + try: + self.EventBody(event_object) + except errors.NoFormatterFound: + logging.error(u'Unable to output line, no formatter found.') + logging.error(event_object.GetString()) + + def EventBody(self, event_object): + """Formats data as TLN and writes to the filehandle from OutputFormater. + + Args: + event_object: The event object (EventObject). + + Raises: + errors.NoFormatterFound: If no formatter for that event is found. + """ + if not hasattr(event_object, 'timestamp'): + return + + # TODO: move this to an output module interface. + event_formatter = formatters_manager.EventFormatterManager.GetFormatter( + event_object) + if not event_formatter: + raise errors.NoFormatterFound( + u'Unable to find event formatter for: {0:s}.'.format( + event_object.DATA_TYPE)) + + msg, _ = event_formatter.GetMessages(event_object) + source_short, _ = event_formatter.GetSources(event_object) + + date_use = timelib.Timestamp.CopyToPosix(event_object.timestamp) + hostname = getattr(event_object, 'hostname', u'') + username = getattr(event_object, 'username', u'') + + if self.store: + if not hostname: + hostname = self._hostnames.get(event_object.store_number, u'') + + pre_obj = self._preprocesses.get(event_object.store_number) + if pre_obj: + check_user = pre_obj.GetUsernameById(username) + if check_user != '-': + username = check_user + + out_write = u'{0!s}|{1:s}|{2:s}|{3:s}|{4!s}\n'.format( + date_use, + source_short.replace(self.DELIMITER, u' '), + hostname.replace(self.DELIMITER, u' '), + username.replace(self.DELIMITER, u' '), + msg.replace(self.DELIMITER, u' ')) + self.filehandle.WriteLine(out_write) diff --git a/plaso/output/tln_test.py b/plaso/output/tln_test.py new file mode 100644 index 0000000..8179a39 --- /dev/null +++ b/plaso/output/tln_test.py @@ -0,0 +1,85 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the TLN output class.""" + +import StringIO +import unittest + +from plaso.formatters import interface as formatters_interface +from plaso.lib import event +from plaso.output import tln + + +class TlnTestEvent(event.EventObject): + """Simplified EventObject for testing.""" + DATA_TYPE = 'test:l2ttln' + + def __init__(self): + """Initialize event with data.""" + super(TlnTestEvent, self).__init__() + self.timestamp = 1340821021000000 + self.hostname = u'ubuntu' + self.display_name = u'OS: log/syslog.1' + self.inode = 12345678 + self.text = ( + u'Reporter PID: |8442| (pam_unix(cron:session): session\n ' + u'closed for user root)') + self.username = u'root' + + +class L2TTlnTestEventFormatter(formatters_interface.EventFormatter): + """Formatter for the test event.""" + DATA_TYPE = 'test:l2ttln' + FORMAT_STRING = u'{text}' + SOURCE_SHORT = 'LOG' + SOURCE_LONG = 'Syslog' + + +class TlnTest(unittest.TestCase): + """Tests for the TLN outputter.""" + + def setUp(self): + """Sets up the objects needed for this test.""" + self.output = StringIO.StringIO() + self.formatter = tln.Tln(None, self.output) + self.event_object = TlnTestEvent() + + def testStart(self): + """Test ensures header line is outputted as expected.""" + correct_line = u'Time|Source|Host|User|Description\n' + + self.formatter.Start() + self.assertEquals(self.output.getvalue(), correct_line) + + def testEventBody(self): + """Test ensures that returned lines returned are formatted as TLN.""" + + self.formatter.EventBody(self.event_object) + correct = (u'1340821021|LOG|ubuntu|root|Reporter PID: 8442 ' + u'(pam_unix(cron:session): session closed for user root)\n') + self.assertEquals(self.output.getvalue(), correct) + + def testEventBodyNoStrayPipes(self): + """Test ensures that the only pipes are the four field delimiters.""" + + self.formatter.EventBody(self.event_object) + self.assertEquals(self.output.getvalue().count(u'|'), 4) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/__init__.py b/plaso/parsers/__init__.py new file mode 100644 index 0000000..76649f1 --- /dev/null +++ b/plaso/parsers/__init__.py @@ -0,0 +1,73 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an import statement for each parser.""" + +from plaso.parsers import asl +from plaso.parsers import android_app_usage +from plaso.parsers import bencode_parser +from plaso.parsers import bsm +from plaso.parsers import chrome_cache +from plaso.parsers import cups_ipp +from plaso.parsers import custom_destinations +from plaso.parsers import esedb +from plaso.parsers import filestat +from plaso.parsers import firefox_cache +from plaso.parsers import hachoir +from plaso.parsers import iis +from plaso.parsers import java_idx +from plaso.parsers import mac_appfirewall +from plaso.parsers import mac_keychain +from plaso.parsers import mac_securityd +from plaso.parsers import mac_wifi +from plaso.parsers import mactime +from plaso.parsers import mcafeeav +from plaso.parsers import msiecf +from plaso.parsers import olecf +from plaso.parsers import opera +from plaso.parsers import oxml +from plaso.parsers import pcap +from plaso.parsers import plist +from plaso.parsers import popcontest +from plaso.parsers import pls_recall +from plaso.parsers import recycler +from plaso.parsers import rubanetra +from plaso.parsers import selinux +from plaso.parsers import skydrivelog +from plaso.parsers import skydrivelogerr +from plaso.parsers import sqlite +from plaso.parsers import symantec +from plaso.parsers import syslog +from plaso.parsers import utmp +from plaso.parsers import utmpx +from plaso.parsers import winevt +from plaso.parsers import winevtx +from plaso.parsers import winfirewall +from plaso.parsers import winjob +from plaso.parsers import winlnk +from plaso.parsers import winprefetch +from plaso.parsers import winreg +from plaso.parsers import xchatlog +from plaso.parsers import xchatscrollback + +# Register plugins. +from plaso.parsers import bencode_plugins +from plaso.parsers import esedb_plugins +from plaso.parsers import olecf_plugins +from plaso.parsers import plist_plugins +from plaso.parsers import sqlite_plugins +from plaso.parsers import winreg_plugins diff --git a/plaso/parsers/android_app_usage.py b/plaso/parsers/android_app_usage.py new file mode 100644 index 0000000..f9ca0ac --- /dev/null +++ b/plaso/parsers/android_app_usage.py @@ -0,0 +1,126 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a parser for the Android usage-history.xml file.""" + +import os + +from xml.etree import ElementTree +from dfvfs.helpers import text_file + +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +class AndroidAppUsageEvent(event.EventObject): + """EventObject for an Android Application Last Resumed event.""" + + DATA_TYPE = 'android:event:last_resume_time' + + def __init__(self, last_resume_time, package, component): + """Initializes the event object. + + Args: + last_resume_time: The Last Resume Time of an Android App with details of + individual components. The timestamp contains the number of + milliseconds since Jan 1, 1970 00:00:00 UTC. + package: The name of the Android App. + component: The individual component of the App. + """ + super(AndroidAppUsageEvent, self).__init__() + self.timestamp = timelib.Timestamp.FromJavaTime(last_resume_time) + self.package = package + self.component = component + + self.timestamp_desc = eventdata.EventTimestamp.LAST_RESUME_TIME + + +class AndroidAppUsageParser(interface.BaseParser): + """Parses the Android usage-history.xml file.""" + + NAME = 'android_app_usage' + DESCRIPTION = u'Parser for the Android usage-history.xml file.' + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract the Android usage-history file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + file_object.seek(0, os.SEEK_SET) + + text_file_object = text_file.TextFile(file_object) + + # Need to verify the first line to make sure this is a) XML and + # b) the right XML. + first_line = text_file_object.readline(90) + + # Note that we must check the data here as a string first, otherwise + # forcing first_line to convert to Unicode can raise a UnicodeDecodeError. + if not first_line.startswith('': + raise errors.UnableToParseFile( + u'Not an Android usage history file [wrong XML root key]') + + # For ElementTree to work we need to work on a filehandle seeked + # to the beginning. + file_object.seek(0, os.SEEK_SET) + + xml = ElementTree.parse(file_object) + root = xml.getroot() + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + for app in root: + for part in app.iter(): + if part.tag == 'comp': + package = app.get(u'name', '') + component = part.get(u'name', '') + + try: + last_resume_time = int(part.get('lrt', u''), 10) + except ValueError: + continue + + event_object = AndroidAppUsageEvent( + last_resume_time, package, component) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + file_object.close() + + +manager.ParsersManager.RegisterParser(AndroidAppUsageParser) diff --git a/plaso/parsers/android_app_usage_test.py b/plaso/parsers/android_app_usage_test.py new file mode 100644 index 0000000..72874df --- /dev/null +++ b/plaso/parsers/android_app_usage_test.py @@ -0,0 +1,85 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Android Application Usage history parsers.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import android_app_usage as android_app_usage_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import android_app_usage +from plaso.parsers import test_lib + + +class AndroidAppUsageParserTest(test_lib.ParserTestCase): + """Tests for the Android Application Usage History parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = android_app_usage.AndroidAppUsageParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['usage-history.xml']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 28) + + event_object = event_objects[22] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-09 19:28:33.047000') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.component, + 'com.sec.android.widgetapp.ap.hero.accuweather.menu.MenuAdd') + + expected_msg = ( + u'Package: ' + u'com.sec.android.widgetapp.ap.hero.accuweather ' + u'Component: ' + u'com.sec.android.widgetapp.ap.hero.accuweather.menu.MenuAdd') + expected_msg_short = ( + u'Package: com.sec.android.widgetapp.ap.hero.accuweather ' + u'Component: com.sec.and...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[17] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-09-27 19:45:55.675000') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals(event_object.package, 'com.google.android.gsf.login') + + expected_msg = ( + u'Package: ' + u'com.google.android.gsf.login ' + u'Component: ' + u'com.google.android.gsf.login.NameActivity') + expected_msg_short = ( + u'Package: com.google.android.gsf.login ' + u'Component: com.google.android.gsf.login...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/asl.py b/plaso/parsers/asl.py new file mode 100644 index 0000000..f970ba0 --- /dev/null +++ b/plaso/parsers/asl.py @@ -0,0 +1,412 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The Apple System Log Parser.""" + +import construct +import logging +import os + +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + +# TODO: get the real name for the user of the group having the uid or gid. + + +class AslEvent(event.EventObject): + """Convenience class for an asl event.""" + + DATA_TYPE = 'mac:asl:event' + + def __init__( + self, timestamp, record_position, message_id, + level, record_header, read_uid, read_gid, computer_name, + sender, facility, message, extra_information): + """Initializes the event object. + + Args: + timestamp: timestamp of the entry. + record_position: position where the record start. + message_id: Identification value for an ASL message. + level: level of criticality. + record_header: header of the entry. + pid: identification number of the process. + uid: identification number of the owner of the process. + gid: identification number of the group of the process. + read_uid: the user ID that can read this file. If -1: all. + read_gid: the group ID that can read this file. If -1: all. + computer_name: name of the host. + sender: the process that insert the event. + facility: the part of the sender that create the event. + message: message of the event. + extra_information: extra fields associated to each entry. + """ + super(AslEvent, self).__init__() + self.pid = record_header.pid + self.user_sid = unicode(record_header.uid) + self.group_id = record_header.gid + self.timestamp = timestamp + self.timestamp_desc = eventdata.EventTimestamp.CREATION_TIME + self.record_position = record_position + self.message_id = message_id + self.level = level + self.read_uid = read_uid + self.read_gid = read_gid + self.computer_name = computer_name + self.sender = sender + self.facility = facility + self.message = message + self.extra_information = extra_information + + +class AslParser(interface.BaseParser): + """Parser for ASL log files.""" + + NAME = 'asl_log' + DESCRIPTION = u'Parser for ASL log files.' + + ASL_MAGIC = 'ASL DB\x00\x00\x00\x00\x00\x00' + + # If not right assigned, the value is "-1". + ASL_NO_RIGHTS = 'ffffffff' + + # Priority level (criticity) + ASL_MESSAGE_PRIORITY = { + 0 : 'EMERGENCY', + 1 : 'ALERT', + 2 : 'CRITICAL', + 3 : 'ERROR', + 4 : 'WARNING', + 5 : 'NOTICE', + 6 : 'INFO', + 7 : 'DEBUG'} + + # ASL File header. + # magic: magic number that identify ASL files. + # version: version of the file. + # offset: first record in the file. + # timestamp: epoch time when the first entry was written. + # last_offset: last record in the file. + ASL_HEADER_STRUCT = construct.Struct( + 'asl_header_struct', + construct.String('magic', 12), + construct.UBInt32('version'), + construct.UBInt64('offset'), + construct.UBInt64('timestamp'), + construct.UBInt32('cache_size'), + construct.UBInt64('last_offset'), + construct.Padding(36)) + + # The record structure is: + # [HEAP][STRUCTURE][4xExtraField][2xExtraField]*[PreviousEntry] + # Record static structure. + # tam_entry: it contains the number of bytes from this file position + # until the end of the record, without counts itself. + # next_offset: next record. If is equal to 0x00, it is the last record. + # asl_message_id: integer that has the numeric identification of the event. + # timestamp: Epoch integer that has the time when the entry was created. + # nanosecond: nanosecond to add to the timestamp. + # level: level of priority. + # pid: process identification that ask to save the record. + # uid: user identification that has lunched the process. + # gid: group identification that has lunched the process. + # read_uid: identification id of a user. Only applied if is not -1 (all FF). + # Only root and this user can read the entry. + # read_gid: the same than read_uid, but for the group. + ASL_RECORD_STRUCT = construct.Struct( + 'asl_record_struct', + construct.Padding(2), + construct.UBInt32('tam_entry'), + construct.UBInt64('next_offset'), + construct.UBInt64('asl_message_id'), + construct.UBInt64('timestamp'), + construct.UBInt32('nanosec'), + construct.UBInt16('level'), + construct.UBInt16('flags'), + construct.UBInt32('pid'), + construct.UBInt32('uid'), + construct.UBInt32('gid'), + construct.UBInt32('read_uid'), + construct.UBInt32('read_gid'), + construct.UBInt64('ref_pid')) + + ASL_RECORD_STRUCT_SIZE = ASL_RECORD_STRUCT.sizeof() + + # 8-byte fields, they can be: + # - String: [Nibble = 1000 (8)][Nibble = Length][7 Bytes = String]. + # - Integer: integer that has the byte position in the file that points + # to an ASL_RECORD_DYN_VALUE struct. If the value of the integer + # is equal to 0, it means that it has not data (skip). + + # If the field is a String, we use this structure to decode each + # integer byte in the corresponding character (ASCII Char). + ASL_OCTET_STRING = construct.ExprAdapter( + construct.Octet('string'), + encoder=lambda obj, ctx: ord(obj), + decoder=lambda obj, ctx: chr(obj)) + + # Field string structure. If the first bit is 1, it means that it + # is a String (1000) = 8, then the next nibble has the number of + # characters. The last 7 bytes are the number of bytes. + ASL_STRING = construct.BitStruct( + 'string', + construct.Flag('type'), + construct.Bits('filler', 3), + construct.If( + lambda ctx: ctx.type, + construct.Nibble('string_length')), + construct.If( + lambda ctx: ctx.type, + construct.Array(7, ASL_OCTET_STRING))) + + # 8-byte pointer to a byte position in the file. + ASL_POINTER = construct.UBInt64('pointer') + + # Dynamic data structure pointed by a pointer that contains a String: + # [2 bytes padding][4 bytes lenght of String][String]. + ASL_RECORD_DYN_VALUE = construct.Struct( + 'asl_record_dyn_value', + construct.Padding(2), + construct.PascalString( + 'value', + length_field=construct.UBInt32('length'))) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract entries from an ASL file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + file_object.seek(0, os.SEEK_SET) + + try: + header = self.ASL_HEADER_STRUCT.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + file_object.close() + raise errors.UnableToParseFile( + u'Unable to parse ASL Header with error: {0:s}.'.format(exception)) + + if header.magic != self.ASL_MAGIC: + file_object.close() + raise errors.UnableToParseFile(u'Not an ASL Header, unable to parse.') + + # Get the first and the last entry. + offset = header.offset + old_offset = header.offset + last_offset_header = header.last_offset + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + # If the ASL file has entries. + if offset: + event_object, offset = self.ReadAslEvent(file_object, offset) + while event_object: + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + # TODO: an anomaly object must be emitted once that is implemented. + # Sanity check, the last read element must be the same as + # indicated by the header. + if offset == 0 and old_offset != last_offset_header: + logging.warning(u'Parsing ended before the header ends.') + old_offset = offset + event_object, offset = self.ReadAslEvent(file_object, offset) + + file_object.close() + + def ReadAslEvent(self, file_object, offset): + """Returns an AslEvent from a single ASL entry. + + Args: + file_object: a file-like object that points to an ASL file. + offset: offset where the static part of the entry starts. + + Returns: + An event object constructed from a single ASL record. + """ + # The heap of the entry is saved to try to avoid seek (performance issue). + # It has the real start position of the entry. + dynamic_start = file_object.tell() + dynamic_part = file_object.read(offset - file_object.tell()) + + if not offset: + return None, None + + try: + record_header = self.ASL_RECORD_STRUCT.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + logging.warning( + u'Unable to parse ASL event with error: {0:s}'.format(exception)) + return None, None + + # Variable tam_fields = is the real length of the dynamic fields. + # We have this: [Record_Struct] + [Dynamic_Fields] + [Pointer_Entry_Before] + # In Record_Struct we have a field called tam_entry, where it has the number + # of bytes until the end of the entry from the position that the field is. + # The tam_entry is between the 2th and the 6th byte in the [Record_Struct]. + # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before] + # Also, we do not need [Point_Entry_Before] and then we delete the size of + # [Point_Entry_Before] that it is 8 bytes (8): + # tam_entry = ([Record_Struct]-6)+[Dynamic_Fields]+[Pointer_Entry_Before] + # [Dynamic_Fields] = tam_entry - [Record_Struct] + 6 - 8 + # [Dynamic_Fields] = tam_entry - [Record_Struct] - 2 + tam_fields = record_header.tam_entry - self.ASL_RECORD_STRUCT_SIZE - 2 + + # Dynamic part of the entry that contains minimal four fields of 8 bytes + # plus 2x[8bytes] fields for each extra ASL_Field. + # The four first fields are always the Host, Sender, Facility and Message. + # After the four first fields, the entry might have extra ASL_Fields. + # For each extra ASL_field, it has a pair of 8-byte fields where the first + # 8 bytes contains the name of the extra ASL_field and the second 8 bytes + # contains the text of the exta field. + # All of this 8-byte field can be saved using one of these three differents + # types: + # - Null value ('0000000000000000'): nothing to do. + # - String: It is string if first bit = 1 or first nibble = 8 (1000). + # Second nibble has the length of string. + # The next 7 bytes have the text characters of the string + # padding the end with null characters: '0x00'. + # Example: [8468 6964 6400 0000] + # [8] String, [4] length, value: [68 69 64 64] = hidd. + # - Pointer: static position in the file to a special struct + # implemented as an ASL_RECORD_DYN_VALUE. + # Example: [0000 0000 0000 0077] + # It points to the file position 0x077 that has a + # ASL_RECORD_DYN_VALUE structure. + values = [] + while tam_fields > 0: + try: + raw_field = file_object.read(8) + except (IOError, construct.FieldError) as exception: + logging.warning( + u'Unable to parse ASL event with error: {0:d}'.format(exception)) + return None, None + try: + # Try to read as a String. + field = self.ASL_STRING.parse(raw_field) + values.append(''.join(field.string[0:field.string_length])) + # Go to parse the next extra field. + tam_fields -= 8 + continue + except ValueError: + pass + # If it is not a string, it must be a pointer. + try: + field = self.ASL_POINTER.parse(raw_field) + except ValueError as exception: + logging.warning( + u'Unable to parse ASL event with error: {0:s}'.format(exception)) + return None, None + if field != 0: + # The next IF ELSE is only for performance issues, avoiding seek. + # If the pointer points a lower position than where the actual entry + # starts, it means that it points to a previuos entry. + pos = field - dynamic_start + # Bigger or equal 0 means that the data is in the actual entry. + if pos >= 0: + try: + values.append((self.ASL_RECORD_DYN_VALUE.parse( + dynamic_part[pos:])).value.partition('\x00')[0]) + except (IOError, construct.FieldError) as exception: + logging.warning( + u'Unable to parse ASL event with error: {0:s}'.format( + exception)) + return None, None + else: + # Only if it is a pointer that points to the + # heap from another entry we use the seek method. + main_position = file_object.tell() + # If the pointer is in a previous entry. + if main_position > field: + file_object.seek(field - main_position, os.SEEK_CUR) + try: + values.append((self.ASL_RECORD_DYN_VALUE.parse_stream( + file_object)).value.partition('\x00')[0]) + except (IOError, construct.FieldError): + logging.warning(( + u'The pointer at {0:d} (0x{0:x}) points to invalid ' + u'information.').format( + main_position - self.ASL_POINTER.sizeof())) + # Come back to the position in the entry. + _ = file_object.read(main_position - file_object.tell()) + else: + _ = file_object.read(field - main_position) + values.append((self.ASL_RECORD_DYN_VALUE.parse_stream( + file_object)).value.partition('\x00')[0]) + # Come back to the position in the entry. + file_object.seek(main_position - file_object.tell(), os.SEEK_CUR) + # Next extra field: 8 bytes more. + tam_fields -= 8 + + # Read the last 8 bytes of the record that points to the previous entry. + _ = file_object.read(8) + + # Parsed section, we translate the read data to an appropriate format. + microsecond = record_header.nanosec // 1000 + timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( + record_header.timestamp, microsecond) + record_position = offset + message_id = record_header.asl_message_id + level = u'{0} ({1})'.format( + self.ASL_MESSAGE_PRIORITY[record_header.level], record_header.level) + # If the value is -1 (FFFFFFFF), it can be read by everyone. + if record_header.read_uid != int(self.ASL_NO_RIGHTS, 16): + read_uid = record_header.read_uid + else: + read_uid = 'ALL' + if record_header.read_gid != int(self.ASL_NO_RIGHTS, 16): + read_gid = record_header.read_gid + else: + read_gid = 'ALL' + + # Parsing the dynamic values (text or pointers to position with text). + # The first four are always the host, sender, facility, and message. + computer_name = values[0] + sender = values[1] + facility = values[2] + message = values[3] + + # If the entry has an extra fields, they works as a pairs: + # The first is the name of the field and the second the value. + extra_information = '' + if len(values) > 4: + values = values[4:] + for index in xrange(0, len(values) // 2): + extra_information += (u'[{0}: {1}]'.format( + values[index * 2], values[(index * 2) + 1])) + + # Return the event and the offset for the next entry. + return AslEvent( + timestamp, record_position, message_id, level, record_header, read_uid, + read_gid, computer_name, sender, facility, message, + extra_information), record_header.next_offset + + +manager.ParsersManager.RegisterParser(AslParser) diff --git a/plaso/parsers/asl_test.py b/plaso/parsers/asl_test.py new file mode 100644 index 0000000..68df720 --- /dev/null +++ b/plaso/parsers/asl_test.py @@ -0,0 +1,96 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Apple System Log file parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import asl as asl_formatter +from plaso.lib import timelib_test +from plaso.parsers import asl +from plaso.parsers import test_lib + + +class AslParserTest(test_lib.ParserTestCase): + """Tests for Apple System Log file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = asl.AslParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['applesystemlog.asl']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEqual(len(event_objects), 2) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-25 09:45:35.705481') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.record_position, 442) + self.assertEqual(event_object.message_id, 101406) + self.assertEqual(event_object.computer_name, u'DarkTemplar-2.local') + self.assertEqual(event_object.sender, u'locationd') + self.assertEqual(event_object.facility, u'com.apple.locationd') + self.assertEqual(event_object.pid, 69) + self.assertEqual(event_object.user_sid, u'205') + self.assertEqual(event_object.group_id, 205) + self.assertEqual(event_object.read_uid, 205) + self.assertEqual(event_object.read_gid, 'ALL') + self.assertEqual(event_object.level, u'WARNING (4)') + + expected_message = ( + u'Incorrect NSStringEncoding value 0x8000100 detected. ' + u'Assuming NSASCIIStringEncoding. Will stop this compatiblity ' + u'mapping behavior in the near future.') + + self.assertEqual(event_object.message, expected_message) + + expected_extra = ( + u'[CFLog Local Time: 2013-11-25 09:45:35.701]' + u'[CFLog Thread: 1007]' + u'[Sender_Mach_UUID: 50E1F76A-60FF-368C-B74E-EB48F6D98C51]') + + self.assertEqual(event_object.extra_information, expected_extra) + + expected_msg = ( + u'MessageID: 101406 ' + u'Level: WARNING (4) ' + u'User ID: 205 ' + u'Group ID: 205 ' + u'Read User: 205 ' + u'Read Group: ALL ' + u'Host: DarkTemplar-2.local ' + u'Sender: locationd ' + u'Facility: com.apple.locationd ' + u'Message: {0:s} {1:s}').format(expected_message, expected_extra) + + expected_msg_short = ( + u'Sender: locationd ' + u'Facility: com.apple.locationd') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/bencode_parser.py b/plaso/parsers/bencode_parser.py new file mode 100644 index 0000000..474c5ec --- /dev/null +++ b/plaso/parsers/bencode_parser.py @@ -0,0 +1,121 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Bencode Parser. + +Plaso's engine calls BencodeParser when it encounters bencoded files to be +processed, typically seen for BitTorrent data. +""" + +import logging +import re +import os + +import bencode + +from plaso.lib import errors +from plaso.parsers import interface +from plaso.parsers import manager + + +class BencodeParser(interface.BasePluginsParser): + """Deserializes bencoded file; produces a dictionary containing bencoded data. + + The Plaso engine calls parsers by their Parse() method. This parser's + Parse() has GetTopLevel() which deserializes bencoded files using the + BitTorrent-bencode library and calls plugins (BencodePlugin) registered + through the interface by their Process() to produce event objects. + + Plugins are how this parser understands the content inside a bencoded file, + each plugin holds logic specific to a particular bencoded file. See the + bencode_plugins / directory for examples of how bencode plugins are + implemented. + """ + + # Regex match for a bencode dictionary followed by a field size. + BENCODE_RE = re.compile('d[0-9]') + + NAME = 'bencode' + DESCRIPTION = u'Parser for bencoded files.' + + _plugin_classes = {} + + def __init__(self): + """Initializes a parser object.""" + super(BencodeParser, self).__init__() + self._plugins = BencodeParser.GetPluginObjects() + + def GetTopLevel(self, file_object): + """Returns deserialized content of a bencoded file as a dictionary object. + + Args: + file_object: A file-like object. + + Returns: + Dictionary object representing the contents of the bencoded file. + """ + header = file_object.read(2) + file_object.seek(0, os.SEEK_SET) + + if not self.BENCODE_RE.match(header): + raise errors.UnableToParseFile(u'Not a valid Bencoded file.') + + try: + data_object = bencode.bdecode(file_object.read()) + except (IOError, bencode.BTFailure) as exception: + raise errors.UnableToParseFile( + u'Unable to parse invalid Bencoded file with error: {0:s}'.format( + exception)) + + if not data_object: + raise errors.UnableToParseFile(u'Not a valid Bencoded file.') + + return data_object + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Parse and extract values from a bencoded file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + data_object = self.GetTopLevel(file_object) + + if not data_object: + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse: {1:s}. Skipping.'.format( + self.NAME, file_entry.name)) + + parser_chain = self._BuildParserChain(parser_chain) + for plugin_object in self._plugins: + try: + plugin_object.Process( + parser_context, data=data_object, file_entry=file_entry, + parser_chain=parser_chain) + + except errors.WrongBencodePlugin as exception: + logging.debug(u'[{0:s}] wrong plugin: {1:s}'.format( + self.NAME, exception)) + + file_object.close() + + +manager.ParsersManager.RegisterParser(BencodeParser) diff --git a/plaso/parsers/bencode_parser_test.py b/plaso/parsers/bencode_parser_test.py new file mode 100644 index 0000000..7e578ec --- /dev/null +++ b/plaso/parsers/bencode_parser_test.py @@ -0,0 +1,143 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Bencode file parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import bencode_parser as bencode_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import bencode_parser +from plaso.parsers import test_lib + + +class BencodeTest(test_lib.ParserTestCase): + """Tests for Bencode file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = bencode_parser.BencodeParser() + + # TODO: Move this to bencode_plugins/tranmission_test.py + def testTransmissionPlugin(self): + """Read Transmission activity files and make few tests.""" + test_file = self._GetTestFilePath(['bencode_transmission']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEqual(len(event_objects), 3) + + event_object = event_objects[0] + + destination_expected = u'/Users/brian/Downloads' + self.assertEqual(event_object.destination, destination_expected) + + self.assertEqual(event_object.seedtime, 4) + + description_expected = eventdata.EventTimestamp.ADDED_TIME + self.assertEqual(event_object.timestamp_desc, description_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-08 15:31:20') + self.assertEqual(event_object.timestamp, expected_timestamp) + + # Test on second event of first torrent. + event_object = event_objects[1] + self.assertEqual(event_object.destination, destination_expected) + self.assertEqual(event_object.seedtime, 4) + + description_expected = eventdata.EventTimestamp.FILE_DOWNLOADED + self.assertEqual(event_object.timestamp_desc, description_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-08 18:24:24') + self.assertEqual(event_object.timestamp, expected_timestamp) + + def testUTorrentPlugin(self): + """Parse a uTorrent resume.dat file and make a few tests.""" + test_file = self._GetTestFilePath(['bencode_utorrent']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEqual(len(event_objects), 4) + + caption_expected = u'plaso test' + path_expected = u'e:\\torrent\\files\\plaso test' + + # First test on when the torrent was added to the client. + event_object = event_objects[3] + + self.assertEqual(event_object.caption, caption_expected) + + self.assertEqual(event_object.path, path_expected) + + self.assertEqual(event_object.seedtime, 511) + + description_expected = eventdata.EventTimestamp.ADDED_TIME + self.assertEqual(event_object.timestamp_desc, description_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-08-03 14:52:12') + self.assertEqual(event_object.timestamp, expected_timestamp) + + # Second test on when the torrent file was completely downloaded. + event_object = event_objects[2] + + self.assertEqual(event_object.caption, caption_expected) + self.assertEqual(event_object.path, path_expected) + self.assertEqual(event_object.seedtime, 511) + + description_expected = eventdata.EventTimestamp.FILE_DOWNLOADED + self.assertEqual(event_object.timestamp_desc, description_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-08-03 18:11:35') + self.assertEqual(event_object.timestamp, expected_timestamp) + + # Third test on when the torrent was first modified. + event_object = event_objects[0] + + self.assertEqual(event_object.caption, caption_expected) + self.assertEqual(event_object.path, path_expected) + self.assertEqual(event_object.seedtime, 511) + + description_expected = eventdata.EventTimestamp.MODIFICATION_TIME + self.assertEqual(event_object.timestamp_desc, description_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-08-03 18:11:34') + self.assertEqual(event_object.timestamp, expected_timestamp) + + # Fourth test on when the torrent was again modified. + event_object = event_objects[1] + + self.assertEqual(event_object.caption, caption_expected) + self.assertEqual(event_object.path, path_expected) + self.assertEqual(event_object.seedtime, 511) + + description_expected = eventdata.EventTimestamp.MODIFICATION_TIME + self.assertEqual(event_object.timestamp_desc, description_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-08-03 16:27:59') + self.assertEqual(event_object.timestamp, expected_timestamp) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/bencode_plugins/__init__.py b/plaso/parsers/bencode_plugins/__init__.py new file mode 100644 index 0000000..8274cec --- /dev/null +++ b/plaso/parsers/bencode_plugins/__init__.py @@ -0,0 +1,20 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an import statement for each bencode related plugin.""" + +from plaso.parsers.bencode_plugins import transmission +from plaso.parsers.bencode_plugins import utorrent diff --git a/plaso/parsers/bencode_plugins/interface.py b/plaso/parsers/bencode_plugins/interface.py new file mode 100644 index 0000000..c5062aa --- /dev/null +++ b/plaso/parsers/bencode_plugins/interface.py @@ -0,0 +1,205 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""bencode_interface contains basic interface for bencode plugins within Plaso. + +Bencoded files are only one example of a type of object that the Plaso tool is +expected to encounter and process. There can be and are many other parsers +which are designed to process specific data types. + +BencodePlugin defines the attributes necessary for registration, discovery +and operation of plugins for bencoded files which will be used by +BencodeParser. +""" + +import abc +import logging + +from plaso.lib import errors +from plaso.parsers import plugins + + +class BencodePlugin(plugins.BasePlugin): + """This is an abstract class from which plugins should be based.""" + + # BENCODE_KEYS is a list of keys required by a plugin. + # This is expected to be overridden by the processing plugin. + # Ex. frozenset(['activity-date', 'done-date']) + BENCODE_KEYS = frozenset(['any']) + + # This is expected to be overridden by the processing plugin. + # URLS should contain a list of URLs with additional information about + # this key or value. + # Ex. ['https://wiki.theory.org/BitTorrentSpecification#Bencoding'] + URLS = [] + + NAME = 'bencode' + + def _GetKeys(self, data, keys, depth=1): + """Helper function to return keys nested in a bencode dict. + + By default this function will return the values for the named keys requested + by a plugin in match{}. The default setting is to look a single layer down + from the root (same as the check for plugin applicability). This level is + suitable for most cases. + + For cases where there is variability in the name at the first level + (e.g. it is the MAC addresses of a device, or a UUID) it is possible to + override the depth limit and use _GetKeys to fetch from a deeper level. + + Args: + data: bencode data in dictionary form. + keys: A list of keys that should be returned. + depth: Defines how many levels deep to check for a match. + + Returns: + A dictionary with just the keys requested. + """ + keys = set(keys) + match = {} + + if depth == 1: + for key in keys: + match[key] = data[key] + else: + for _, parsed_key, parsed_value in self._RecurseKey( + data, depth=depth): + if parsed_key in keys: + match[parsed_key] = parsed_value + if set(match.keys()) == keys: + return match + return match + + def _RecurseKey(self, recur_item, root='', depth=15): + """Flattens nested dictionaries and lists by yielding it's values. + + The hierarchy of a bencode file is a series of nested dictionaries and + lists. This is a helper function helps plugins navigate the structure + without having to reimplement their own recursive methods. + + This method implements an overridable depth limit to prevent processing + extremely deeply nested dictionaries. If the limit is reached a debug + message is logged indicating which key processing stopped on. + + Args: + recur_item: An object to be checked for additional nested items. + root: The pathname of the current working key. + depth: A counter to ensure we stop at the maximum recursion depth. + + Yields: + A tuple of the root, key, and value from a bencoded file. + """ + if depth < 1: + logging.debug(u'Recursion limit hit for key: {0:s}'.format(root)) + return + + if type(recur_item) in (list, tuple): + for recur in recur_item: + for key in self._RecurseKey(recur, root, depth): + yield key + return + + if not hasattr(recur_item, 'iteritems'): + return + + for key, value in recur_item.iteritems(): + yield root, key, value + if isinstance(value, dict): + value = [value] + if isinstance(value, list): + for item in value: + if isinstance(item, dict): + for keyval in self._RecurseKey( + item, root=root + u'/' + key, depth=depth - 1): + yield keyval + + @abc.abstractmethod + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, data=None, + match=None, **kwargs): + """Extracts event object from the values of entries within a bencoded file. + + This is the main method that a bencode plugin needs to implement. + + The contents of the bencode keys defined in BENCODE_KEYS can be made + available to the plugin as both a matched{'KEY': 'value'} and as the + entire bencoded data dictionary. The plugin should implement logic to parse + the most relevant data set into a useful event for incorporation into the + Plaso timeline. + + The attributes for a BencodeEvent should include the following: + root = Root key this event was extracted from. + key = Key the value resided in. + time = Date this artifact was created in microseconds(usec) from epoch. + desc = Short description. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + data: Bencode data in dictionary form. The default is None. + match: Optional dictionary containing only the keys selected in the + BENCODE_KEYS. The default is None. + """ + + def Process( + self, parser_context, file_entry=None, parser_chain=None, + data=None, **kwargs): + """Determine if this is the correct plugin; if so proceed with processing. + + Process() checks if the current bencode file being processed is a match for + a plugin by comparing the PATH and KEY requirements defined by a plugin. If + both match processing continues; else raise WrongBencodePlugin. + + This function also extracts the required keys as defined in + self.BENCODE_KEYS from the file and stores the result in match[key] + and calls self.GetEntries() which holds the processing logic implemented by + the plugin. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + data: Bencode data in dictionary form. The default is None. + + Raises: + WrongBencodePlugin: If this plugin is not able to process the given file. + ValueError: If top level is not set. + """ + if data is None: + raise ValueError(u'Data is not set.') + + if not set(data.keys()).issuperset(self.BENCODE_KEYS): + raise errors.WrongBencodePlugin(self.NAME) + + # This will raise if unhandled keyword arguments are passed. + super(BencodePlugin, self).Process(parser_context, **kwargs) + + logging.debug(u'Bencode Plugin Used: {0:s}'.format(self.NAME)) + match = self._GetKeys(data, self.BENCODE_KEYS, 3) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + self.GetEntries( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + data=data, match=match) diff --git a/plaso/parsers/bencode_plugins/test_lib.py b/plaso/parsers/bencode_plugins/test_lib.py new file mode 100644 index 0000000..caeeadd --- /dev/null +++ b/plaso/parsers/bencode_plugins/test_lib.py @@ -0,0 +1,24 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Bencode plugin related functions and classes for testing.""" + +from plaso.parsers import test_lib + + +class BencodePluginTestCase(test_lib.ParserTestCase): + """The unit test case for a bencode plugin.""" diff --git a/plaso/parsers/bencode_plugins/transmission.py b/plaso/parsers/bencode_plugins/transmission.py new file mode 100644 index 0000000..63661f8 --- /dev/null +++ b/plaso/parsers/bencode_plugins/transmission.py @@ -0,0 +1,102 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a bencode plugin for Transmission BitTorrent data.""" + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import bencode_parser +from plaso.parsers.bencode_plugins import interface + + +class TransmissionEvent(time_events.PosixTimeEvent): + """Convenience class for a Transmission BitTorrent activity event.""" + + DATA_TYPE = 'p2p:bittorrent:transmission' + + def __init__(self, timestamp, timestamp_description, destination, seedtime): + """Initializes the event. + + Args: + timestamp: The POSIX timestamp of the event. + timestamp_desc: A short description of the meaning of the timestamp. + destination: Downloaded file name within .torrent file + seedtime: Number of seconds client seeded torrent + """ + super(TransmissionEvent, self).__init__(timestamp, timestamp_description) + self.destination = destination + self.seedtime = seedtime // 60 # Convert seconds to minutes. + + +class TransmissionPlugin(interface.BencodePlugin): + """Parse Transmission BitTorrent activity file for current torrents.""" + + NAME = 'bencode_transmission' + DESCRIPTION = u'Parser for Transmission bencoded files.' + + BENCODE_KEYS = frozenset([ + 'activity-date', 'done-date', 'added-date', 'destination', + 'seeding-time-seconds']) + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, data=None, + **unused_kwargs): + """Extract data from Transmission's resume folder files. + + This is the main parsing engine for the parser. It determines if + the selected file is the proper file to parse and extracts current + running torrents. + + Transmission stores an individual Bencoded file for each active download + in a folder named resume under the user's application data folder. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + data: Optional bencode data in dictionary form. The default is None. + """ + # Place the obtained values into the event. + destination = data.get('destination', None) + seeding_time = data.get('seeding-time-seconds', None) + + # Create timeline events based on extracted values. + if data.get('added-date', 0): + event_object = TransmissionEvent( + data.get('added-date'), eventdata.EventTimestamp.ADDED_TIME, + destination, seeding_time) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if data.get('done-date', 0): + event_object = TransmissionEvent( + data.get('done-date'), eventdata.EventTimestamp.FILE_DOWNLOADED, + destination, seeding_time) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if data.get('activity-date', None): + event_object = TransmissionEvent( + data.get('activity-date'), eventdata.EventTimestamp.ACCESS_TIME, + destination, seeding_time) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +bencode_parser.BencodeParser.RegisterPlugin(TransmissionPlugin) diff --git a/plaso/parsers/bencode_plugins/utorrent.py b/plaso/parsers/bencode_plugins/utorrent.py new file mode 100644 index 0000000..c5a9168 --- /dev/null +++ b/plaso/parsers/bencode_plugins/utorrent.py @@ -0,0 +1,137 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a bencode plugin for uTorrent data.""" + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.parsers import bencode_parser +from plaso.parsers.bencode_plugins import interface + + +class UTorrentEvent(time_events.PosixTimeEvent): + """Convenience class for a uTorrent active torrents history entries.""" + + DATA_TYPE = 'p2p:bittorrent:utorrent' + + def __init__( + self, timestamp, timestamp_description, path, caption, seedtime): + """Initialize the event. + + Args: + path: Torrent download location + caption: Official name of package + seedtime: Number of seconds client seeded torrent + """ + super(UTorrentEvent, self).__init__(timestamp, timestamp_description) + self.path = path + self.caption = caption + self.seedtime = seedtime // 60 # Convert seconds to minutes. + + +class UTorrentPlugin(interface.BencodePlugin): + """Plugin to extract uTorrent active torrent events.""" + + NAME = 'bencode_utorrent' + DESCRIPTION = u'Parser for uTorrent bencoded files.' + + # The following set is used to determine if the bencoded data is appropriate + # for this plugin. If there's a match, the entire bencoded data block is + # returned for analysis. + BENCODE_KEYS = frozenset(['.fileguard']) + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, data=None, + **unused_kwargs): + """Extracts uTorrent active torrents. + + This is the main parsing engine for the plugin. It determines if + the selected file is the proper file to parse and extracts current + running torrents. + + interface.Process() checks for the given BENCODE_KEYS set, ensures + that it matches, and then passes the bencoded data to this function for + parsing. This plugin then parses the entire set of bencoded data to extract + the variable file-name keys to retrieve their values. + + uTorrent creates a file, resume.dat, and a backup, resume.dat.old, to + for all active torrents. This is typically stored in the user's + application data folder. + + These files, at a minimum, contain a '.fileguard' key and a dictionary + with a key name for a particular download with a '.torrent' file + extension. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + data: Optional bencode data in dictionary form. The default is None. + """ + # Walk through one of the torrent keys to ensure it's from a valid file. + for key, value in data.iteritems(): + if not u'.torrent' in key: + continue + + caption = value.get('caption') + path = value.get('path') + seedtime = value.get('seedtime') + if not caption or not path or seedtime < 0: + raise errors.WrongBencodePlugin(self.NAME) + + for torrent, value in data.iteritems(): + if not u'.torrent' in torrent: + continue + + path = value.get('path', None) + caption = value.get('caption', None) + seedtime = value.get('seedtime', None) + + # Create timeline events based on extracted values. + for event_key, event_value in value.iteritems(): + if event_key == 'added_on': + event_object = UTorrentEvent( + event_value, eventdata.EventTimestamp.ADDED_TIME, + path, caption, seedtime) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + elif event_key == 'completed_on': + event_object = UTorrentEvent( + event_value, eventdata.EventTimestamp.FILE_DOWNLOADED, + path, caption, seedtime) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + elif event_key == 'modtimes': + for modtime in event_value: + # Some values are stored as 0, skip those. + if not modtime: + continue + + event_object = UTorrentEvent( + modtime, eventdata.EventTimestamp.MODIFICATION_TIME, + path, caption, seedtime) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, + file_entry=file_entry) + + +bencode_parser.BencodeParser.RegisterPlugin(UTorrentPlugin) diff --git a/plaso/parsers/bsm.py b/plaso/parsers/bsm.py new file mode 100644 index 0000000..1299426 --- /dev/null +++ b/plaso/parsers/bsm.py @@ -0,0 +1,1145 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Basic Security Module Parser.""" + +import binascii +import construct +import logging +import os +import socket + +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.unix import bsmtoken +from plaso.parsers import interface +from plaso.parsers import manager + +import pytz + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +# Note that we're using Array and a helper function here instead of +# PascalString because the latter seems to break pickling on Windows. + +def _BsmTokenGetLength(context): + """Contruct context parser helper function to replace lambda.""" + return context.length + + +# Note that we're using RepeatUntil and a helper function here instead of +# CString because the latter seems to break pickling on Windows. + +def _BsmTokenIsEndOfString(value, unused_context): + """Construct context parser helper function to replace lambda.""" + return value == '\x00' + + +# Note that we're using Switch and a helper function here instead of +# IfThenElse because the latter seems to break pickling on Windows. + +def _BsmTokenGetNetType(context): + """Construct context parser helper function to replace lambda.""" + return context.net_type + + +def _BsmTokenGetSocketDomain(context): + """Construct context parser helper function to replace lambda.""" + return context.socket_domain + + +class MacBsmEvent(event.EventObject): + """Convenience class for a Mac OS X BSM event.""" + + DATA_TYPE = 'mac:bsm:event' + + def __init__( + self, event_type, timestamp, extra_tokens, + return_value, record_length, offset): + """Initializes the event object. + + Args: + event_type: String with the text and ID that represents the event type. + timestamp: Entry Epoch timestamp in UTC. + extra_tokens: List of the extra tokens of the entry. + return_value: String with the process return value and exit status. + record_length: Record length in bytes (trailer number). + offset: The offset in bytes to where the record starts in the file. + """ + super(MacBsmEvent, self).__init__() + self.timestamp = timestamp + self.timestamp_desc = eventdata.EventTimestamp.CREATION_TIME + self.event_type = event_type + self.extra_tokens = extra_tokens + self.return_value = return_value + self.record_length = record_length + self.offset = offset + + +class BsmEvent(event.EventObject): + """Convenience class for a Generic BSM event.""" + + DATA_TYPE = 'bsm:event' + + def __init__( + self, event_type, timestamp, extra_tokens, record_length, offset): + """Initializes the event object. + + Args: + event_type: Text and integer ID that represents the type of the event. + timestamp: Timestamp of the entry. + extra_tokens: List of the extra tokens of the entry. + record_length: Record length in bytes (trailer number). + offset: The offset in bytes to where the record starts in the file. + """ + super(BsmEvent, self).__init__() + self.timestamp = timestamp + self.timestamp_desc = eventdata.EventTimestamp.CREATION_TIME + self.event_type = event_type + self.extra_tokens = extra_tokens + self.record_length = record_length + self.offset = offset + + +class BsmParser(interface.BaseParser): + """Parser for BSM files.""" + + NAME = 'bsm_log' + DESCRIPTION = u'Parser for BSM log files.' + + # BSM supported version (0x0b = 11). + AUDIT_HEADER_VERSION = 11 + + # Magic Trail Header. + BSM_TOKEN_TRAILER_MAGIC = 'b105' + + # IP Version constants. + AU_IPv4 = 4 + AU_IPv6 = 16 + + IPV4_STRUCT = construct.UBInt32('ipv4') + + IPV6_STRUCT = construct.Struct( + 'ipv6', construct.UBInt64('high'), construct.UBInt64('low')) + + # Tested structures. + # INFO: I have ommited the ID in the structures declaration. + # I used the BSM_TYPE first to read the ID, and then, the structure. + # Tokens always start with an ID value that identifies their token + # type and subsequent structure. + BSM_TYPE = construct.UBInt8('token_id') + + # Data type structures. + BSM_TOKEN_DATA_CHAR = construct.String('value', 1) + BSM_TOKEN_DATA_SHORT = construct.UBInt16('value') + BSM_TOKEN_DATA_INTEGER = construct.UBInt32('value') + + # Common structure used by other structures. + # audit_uid: integer, uid that generates the entry. + # effective_uid: integer, the permission user used. + # effective_gid: integer, the permission group used. + # real_uid: integer, user id of the user that execute the process. + # real_gid: integer, group id of the group that execute the process. + # pid: integer, identification number of the process. + # session_id: unknown, need research. + BSM_TOKEN_SUBJECT_SHORT = construct.Struct( + 'subject_data', + construct.UBInt32('audit_uid'), + construct.UBInt32('effective_uid'), + construct.UBInt32('effective_gid'), + construct.UBInt32('real_uid'), + construct.UBInt32('real_gid'), + construct.UBInt32('pid'), + construct.UBInt32('session_id')) + + # Common structure used by other structures. + # Identify the kind of inet (IPv4 or IPv6) + # TODO: instead of 16, AU_IPv6 must be used. + BSM_IP_TYPE_SHORT = construct.Struct( + 'bsm_ip_type_short', + construct.UBInt32('net_type'), + construct.Switch( + 'ip_addr', + _BsmTokenGetNetType, + {16: IPV6_STRUCT}, + default=IPV4_STRUCT)) + + # Initial fields structure used by header structures. + # length: integer, the length of the entry, equal to trailer (doc: length). + # version: integer, version of BSM (AUDIT_HEADER_VERSION). + # event_type: integer, the type of event (/etc/security/audit_event). + # modifier: integer, unknown, need research (It is always 0). + BSM_HEADER = construct.Struct( + 'bsm_header', + construct.UBInt32('length'), + construct.UBInt8('version'), + construct.UBInt16('event_type'), + construct.UBInt16('modifier')) + + # First token of one entry. + # timestamp: integer, Epoch timestamp of the entry. + # microsecond: integer, the microsecond of the entry. + BSM_HEADER32 = construct.Struct( + 'bsm_header32', + BSM_HEADER, + construct.UBInt32('timestamp'), + construct.UBInt32('microsecond')) + + BSM_HEADER64 = construct.Struct( + 'bsm_header64', + BSM_HEADER, + construct.UBInt64('timestamp'), + construct.UBInt64('microsecond')) + + BSM_HEADER32_EX = construct.Struct( + 'bsm_header32_ex', + BSM_HEADER, + BSM_IP_TYPE_SHORT, + construct.UBInt32('timestamp'), + construct.UBInt32('microsecond')) + + # Token TEXT, provides extra information. + BSM_TOKEN_TEXT = construct.Struct( + 'bsm_token_text', + construct.UBInt16('length'), + construct.Array( + _BsmTokenGetLength, + construct.UBInt8('text'))) + + # Path of the executable. + BSM_TOKEN_PATH = BSM_TOKEN_TEXT + + # Identified the end of the record (follow by TRAILER). + # status: integer that identifies the status of the exit (BSM_ERRORS). + # return: returned value from the operation. + BSM_TOKEN_RETURN32 = construct.Struct( + 'bsm_token_return32', + construct.UBInt8('status'), + construct.UBInt32('return_value')) + + BSM_TOKEN_RETURN64 = construct.Struct( + 'bsm_token_return64', + construct.UBInt8('status'), + construct.UBInt64('return_value')) + + # Identified the number of bytes that was written. + # magic: 2 bytes that identifes the TRAILER (BSM_TOKEN_TRAILER_MAGIC). + # length: integer that has the number of bytes from the entry size. + BSM_TOKEN_TRAILER = construct.Struct( + 'bsm_token_trailer', + construct.UBInt16('magic'), + construct.UBInt32('record_length')) + + # A 32-bits argument. + # num_arg: the number of the argument. + # name_arg: the argument's name. + # text: the string value of the argument. + BSM_TOKEN_ARGUMENT32 = construct.Struct( + 'bsm_token_argument32', + construct.UBInt8('num_arg'), + construct.UBInt32('name_arg'), + construct.UBInt16('length'), + construct.Array( + _BsmTokenGetLength, + construct.UBInt8('text'))) + + # A 64-bits argument. + # num_arg: integer, the number of the argument. + # name_arg: text, the argument's name. + # text: the string value of the argument. + BSM_TOKEN_ARGUMENT64 = construct.Struct( + 'bsm_token_argument64', + construct.UBInt8('num_arg'), + construct.UBInt64('name_arg'), + construct.UBInt16('length'), + construct.Array( + _BsmTokenGetLength, + construct.UBInt8('text'))) + + # Identify an user. + # terminal_id: unknown, research needed. + # terminal_addr: unknown, research needed. + BSM_TOKEN_SUBJECT32 = construct.Struct( + 'bsm_token_subject32', + BSM_TOKEN_SUBJECT_SHORT, + construct.UBInt32('terminal_port'), + IPV4_STRUCT) + + # Identify an user using a extended Token. + # terminal_port: unknown, need research. + # net_type: unknown, need research. + BSM_TOKEN_SUBJECT32_EX = construct.Struct( + 'bsm_token_subject32_ex', + BSM_TOKEN_SUBJECT_SHORT, + construct.UBInt32('terminal_port'), + BSM_IP_TYPE_SHORT) + + # au_to_opaque // AUT_OPAQUE + BSM_TOKEN_OPAQUE = BSM_TOKEN_TEXT + + # au_to_seq // AUT_SEQ + BSM_TOKEN_SEQUENCE = BSM_TOKEN_DATA_INTEGER + + # Program execution with options. + # For each argument we are going to have a string+ "\x00". + # Example: [00 00 00 02][41 42 43 00 42 42 00] + # 2 Arguments, Arg1: [414243] Arg2: [4242]. + BSM_TOKEN_EXEC_ARGUMENTS = construct.UBInt32('number_arguments') + + BSM_TOKEN_EXEC_ARGUMENT = construct.Struct( + 'bsm_token_exec_argument', + construct.RepeatUntil( + _BsmTokenIsEndOfString, + construct.StaticField("text", 1))) + + # au_to_in_addr // AUT_IN_ADDR: + BSM_TOKEN_ADDR = IPV4_STRUCT + + # au_to_in_addr_ext // AUT_IN_ADDR_EX: + BSM_TOKEN_ADDR_EXT = construct.Struct( + 'bsm_token_addr_ext', + construct.UBInt32('net_type'), + IPV6_STRUCT) + + # au_to_ip // AUT_IP: + # TODO: parse this header in the correct way. + BSM_TOKEN_IP = construct.String('binary_ipv4_add', 20) + + # au_to_ipc // AUT_IPC: + BSM_TOKEN_IPC = construct.Struct( + 'bsm_token_ipc', + construct.UBInt8('object_type'), + construct.UBInt32('object_id')) + + # au_to_ipc_perm // au_to_ipc_perm + BSM_TOKEN_IPC_PERM = construct.Struct( + 'bsm_token_ipc_perm', + construct.UBInt32('user_id'), + construct.UBInt32('group_id'), + construct.UBInt32('creator_user_id'), + construct.UBInt32('creator_group_id'), + construct.UBInt32('access_mode'), + construct.UBInt32('slot_seq'), + construct.UBInt32('key')) + + # au_to_iport // AUT_IPORT: + BSM_TOKEN_PORT = construct.UBInt16('port_number') + + # au_to_file // AUT_OTHER_FILE32: + BSM_TOKEN_FILE = construct.Struct( + 'bsm_token_file', + construct.UBInt32('timestamp'), + construct.UBInt32('microsecond'), + construct.UBInt16('length'), + construct.Array( + _BsmTokenGetLength, + construct.UBInt8('text'))) + + # au_to_subject64 // AUT_SUBJECT64: + BSM_TOKEN_SUBJECT64 = construct.Struct( + 'bsm_token_subject64', + BSM_TOKEN_SUBJECT_SHORT, + construct.UBInt64('terminal_port'), + IPV4_STRUCT) + + # au_to_subject64_ex // AU_IPv4: + BSM_TOKEN_SUBJECT64_EX = construct.Struct( + 'bsm_token_subject64_ex', + BSM_TOKEN_SUBJECT_SHORT, + construct.UBInt32('terminal_port'), + construct.UBInt32('terminal_type'), + BSM_IP_TYPE_SHORT) + + # au_to_process32 // AUT_PROCESS32: + BSM_TOKEN_PROCESS32 = construct.Struct( + 'bsm_token_process32', + BSM_TOKEN_SUBJECT_SHORT, + construct.UBInt32('terminal_port'), + IPV4_STRUCT) + + # au_to_process64 // AUT_PROCESS32: + BSM_TOKEN_PROCESS64 = construct.Struct( + 'bsm_token_process64', + BSM_TOKEN_SUBJECT_SHORT, + construct.UBInt64('terminal_port'), + IPV4_STRUCT) + + # au_to_process32_ex // AUT_PROCESS32_EX: + BSM_TOKEN_PROCESS32_EX = construct.Struct( + 'bsm_token_process32_ex', + BSM_TOKEN_SUBJECT_SHORT, + construct.UBInt32('terminal_port'), + BSM_IP_TYPE_SHORT) + + # au_to_process64_ex // AUT_PROCESS64_EX: + BSM_TOKEN_PROCESS64_EX = construct.Struct( + 'bsm_token_process64_ex', + BSM_TOKEN_SUBJECT_SHORT, + construct.UBInt64('terminal_port'), + BSM_IP_TYPE_SHORT) + + # au_to_sock_inet32 // AUT_SOCKINET32: + BSM_TOKEN_AUT_SOCKINET32 = construct.Struct( + 'bsm_token_aut_sockinet32', + construct.UBInt16('net_type'), + construct.UBInt16('port_number'), + IPV4_STRUCT) + + # Info: checked against the source code of XNU, but not against + # real BSM file. + BSM_TOKEN_AUT_SOCKINET128 = construct.Struct( + 'bsm_token_aut_sockinet128', + construct.UBInt16('net_type'), + construct.UBInt16('port_number'), + IPV6_STRUCT) + + INET6_ADDR_TYPE = construct.Struct( + 'addr_type', + construct.UBInt16('ip_type'), + construct.UBInt16('source_port'), + construct.UBInt64('saddr_high'), + construct.UBInt64('saddr_low'), + construct.UBInt16('destination_port'), + construct.UBInt64('daddr_high'), + construct.UBInt64('daddr_low')) + + INET4_ADDR_TYPE = construct.Struct( + 'addr_type', + construct.UBInt16('ip_type'), + construct.UBInt16('source_port'), + construct.UBInt32('source_address'), + construct.UBInt16('destination_port'), + construct.UBInt32('destination_address')) + + # au_to_socket_ex // AUT_SOCKET_EX + # TODO: Change the 26 for unixbsm.BSM_PROTOCOLS.INET6. + BSM_TOKEN_AUT_SOCKINET32_EX = construct.Struct( + 'bsm_token_aut_sockinet32_ex', + construct.UBInt16('socket_domain'), + construct.UBInt16('socket_type'), + construct.Switch( + 'structure_addr_port', + _BsmTokenGetSocketDomain, + {26: INET6_ADDR_TYPE}, + default=INET4_ADDR_TYPE)) + + # au_to_sock_unix // AUT_SOCKUNIX + BSM_TOKEN_SOCKET_UNIX = construct.Struct( + 'bsm_token_au_to_sock_unix', + construct.UBInt16('family'), + construct.RepeatUntil( + _BsmTokenIsEndOfString, + construct.StaticField("path", 1))) + + # au_to_data // au_to_data + # how to print: bsmtoken.BSM_TOKEN_DATA_PRINT. + # type: bsmtoken.BSM_TOKEN_DATA_TYPE. + # unit_count: number of type values. + # BSM_TOKEN_DATA has a end field = type * unit_count + BSM_TOKEN_DATA = construct.Struct( + 'bsm_token_data', + construct.UBInt8('how_to_print'), + construct.UBInt8('data_type'), + construct.UBInt8('unit_count')) + + # au_to_attr32 // AUT_ATTR32 + BSM_TOKEN_ATTR32 = construct.Struct( + 'bsm_token_attr32', + construct.UBInt32('file_mode'), + construct.UBInt32('uid'), + construct.UBInt32('gid'), + construct.UBInt32('file_system_id'), + construct.UBInt64('file_system_node_id'), + construct.UBInt32('device')) + + # au_to_attr64 // AUT_ATTR64 + BSM_TOKEN_ATTR64 = construct.Struct( + 'bsm_token_attr64', + construct.UBInt32('file_mode'), + construct.UBInt32('uid'), + construct.UBInt32('gid'), + construct.UBInt32('file_system_id'), + construct.UBInt64('file_system_node_id'), + construct.UBInt64('device')) + + # au_to_exit // AUT_EXIT + BSM_TOKEN_EXIT = construct.Struct( + 'bsm_token_exit', + construct.UBInt32('status'), + construct.UBInt32('return_value')) + + # au_to_newgroups // AUT_NEWGROUPS + # INFO: we must read BSM_TOKEN_DATA_INTEGER for each group. + BSM_TOKEN_GROUPS = construct.UBInt16('group_number') + + # au_to_exec_env == au_to_exec_args + BSM_TOKEN_EXEC_ENV = BSM_TOKEN_EXEC_ARGUMENTS + + # au_to_zonename //AUT_ZONENAME + BSM_TOKEN_ZONENAME = BSM_TOKEN_TEXT + + # Token ID. + # List of valid Token_ID. + # Token_ID -> [NAME_STRUCTURE, STRUCTURE] + # Only the checked structures are been added to the valid structures lists. + BSM_TYPE_LIST = { + 17: ['BSM_TOKEN_FILE', BSM_TOKEN_FILE], + 19: ['BSM_TOKEN_TRAILER', BSM_TOKEN_TRAILER], + 20: ['BSM_HEADER32', BSM_HEADER32], + 21: ['BSM_HEADER64', BSM_HEADER64], + 33: ['BSM_TOKEN_DATA', BSM_TOKEN_DATA], + 34: ['BSM_TOKEN_IPC', BSM_TOKEN_IPC], + 35: ['BSM_TOKEN_PATH', BSM_TOKEN_PATH], + 36: ['BSM_TOKEN_SUBJECT32', BSM_TOKEN_SUBJECT32], + 38: ['BSM_TOKEN_PROCESS32', BSM_TOKEN_PROCESS32], + 39: ['BSM_TOKEN_RETURN32', BSM_TOKEN_RETURN32], + 40: ['BSM_TOKEN_TEXT', BSM_TOKEN_TEXT], + 41: ['BSM_TOKEN_OPAQUE', BSM_TOKEN_OPAQUE], + 42: ['BSM_TOKEN_ADDR', BSM_TOKEN_ADDR], + 43: ['BSM_TOKEN_IP', BSM_TOKEN_IP], + 44: ['BSM_TOKEN_PORT', BSM_TOKEN_PORT], + 45: ['BSM_TOKEN_ARGUMENT32', BSM_TOKEN_ARGUMENT32], + 47: ['BSM_TOKEN_SEQUENCE', BSM_TOKEN_SEQUENCE], + 96: ['BSM_TOKEN_ZONENAME', BSM_TOKEN_ZONENAME], + 113: ['BSM_TOKEN_ARGUMENT64', BSM_TOKEN_ARGUMENT64], + 114: ['BSM_TOKEN_RETURN64', BSM_TOKEN_RETURN64], + 116: ['BSM_HEADER32_EX', BSM_HEADER32_EX], + 119: ['BSM_TOKEN_PROCESS64', BSM_TOKEN_PROCESS64], + 122: ['BSM_TOKEN_SUBJECT32_EX', BSM_TOKEN_SUBJECT32_EX], + 127: ['BSM_TOKEN_AUT_SOCKINET32_EX', BSM_TOKEN_AUT_SOCKINET32_EX], + 128: ['BSM_TOKEN_AUT_SOCKINET32', BSM_TOKEN_AUT_SOCKINET32]} + + # Untested structures. + # When not tested structure is found, we try to parse using also + # these structures. + BSM_TYPE_LIST_NOT_TESTED = { + 49: ['BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32], + 50: ['BSM_TOKEN_IPC_PERM', BSM_TOKEN_IPC_PERM], + 52: ['BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS], + 59: ['BSM_TOKEN_GROUPS', BSM_TOKEN_GROUPS], + 60: ['BSM_TOKEN_EXEC_ARGUMENTS', BSM_TOKEN_EXEC_ARGUMENTS], + 61: ['BSM_TOKEN_EXEC_ENV', BSM_TOKEN_EXEC_ENV], + 62: ['BSM_TOKEN_ATTR32', BSM_TOKEN_ATTR32], + 82: ['BSM_TOKEN_EXIT', BSM_TOKEN_EXIT], + 115: ['BSM_TOKEN_ATTR64', BSM_TOKEN_ATTR64], + 117: ['BSM_TOKEN_SUBJECT64', BSM_TOKEN_SUBJECT64], + 123: ['BSM_TOKEN_PROCESS32_EX', BSM_TOKEN_PROCESS32_EX], + 124: ['BSM_TOKEN_PROCESS64_EX', BSM_TOKEN_PROCESS64_EX], + 125: ['BSM_TOKEN_SUBJECT64_EX', BSM_TOKEN_SUBJECT64_EX], + 126: ['BSM_TOKEN_ADDR_EXT', BSM_TOKEN_ADDR_EXT], + 129: ['BSM_TOKEN_AUT_SOCKINET128', BSM_TOKEN_AUT_SOCKINET128], + 130: ['BSM_TOKEN_SOCKET_UNIX', BSM_TOKEN_SOCKET_UNIX]} + + def __init__(self): + """Initializes a parser object.""" + super(BsmParser, self).__init__() + # Create the dictionary with all token IDs: tested and untested. + self.bsm_type_list_all = self.BSM_TYPE_LIST.copy() + self.bsm_type_list_all.update(self.BSM_TYPE_LIST_NOT_TESTED) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract entries from a BSM file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + file_object.seek(0, os.SEEK_SET) + + try: + is_bsm = self.VerifyFile(parser_context, file_object) + except (IOError, construct.FieldError) as exception: + file_object.close() + raise errors.UnableToParseFile( + u'Unable to parse BSM file with error: {0:s}'.format(exception)) + + if not is_bsm: + file_object.close() + raise errors.UnableToParseFile( + u'Not a BSM File, unable to parse.') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + event_object = self.ReadBSMEvent(parser_context, file_object) + while event_object: + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + event_object = self.ReadBSMEvent(parser_context, file_object) + + file_object.close() + + def ReadBSMEvent(self, parser_context, file_object): + """Returns a BsmEvent from a single BSM entry. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_object: A file-like object. + + Returns: + An event object. + """ + # A list of tokens that has the entry. + extra_tokens = [] + + offset = file_object.tell() + + # Token header, first token for each entry. + try: + token_id = self.BSM_TYPE.parse_stream(file_object) + except (IOError, construct.FieldError): + return + + bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, ['', '']) + if bsm_type == 'BSM_HEADER32': + token = structure.parse_stream(file_object) + elif bsm_type == 'BSM_HEADER64': + token = structure.parse_stream(file_object) + elif bsm_type == 'BSM_HEADER32_EX': + token = structure.parse_stream(file_object) + else: + logging.warning( + u'Token ID Header {0} not expected at position 0x{1:X}.' + u'The parsing of the file cannot be continued'.format( + token_id, file_object.tell())) + # TODO: if it is a Mac OS X, search for the trailer magic value + # as a end of the entry can be a possibility to continue. + return + + length = token.bsm_header.length + event_type = u'{0} ({1})'.format( + bsmtoken.BSM_AUDIT_EVENT.get(token.bsm_header.event_type, 'UNKNOWN'), + token.bsm_header.event_type) + timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( + token.timestamp, token.microsecond) + + # Read until we reach the end of the record. + while file_object.tell() < (offset + length): + # Check if it is a known token. + try: + token_id = self.BSM_TYPE.parse_stream(file_object) + except (IOError, construct.FieldError): + logging.warning( + u'Unable to parse the Token ID at position: {0:d}'.format( + file_object.tell())) + return + if not token_id in self.BSM_TYPE_LIST: + pending = (offset + length) - file_object.tell() + extra_tokens.extend(self.TryWithUntestedStructures( + file_object, token_id, pending)) + else: + token = self.BSM_TYPE_LIST[token_id][1].parse_stream(file_object) + extra_tokens.append(self.FormatToken(token_id, token, file_object)) + + if file_object.tell() > (offset + length): + logging.warning( + u'Token ID {0} not expected at position 0x{1:X}.' + u'Jumping for the next entry.'.format( + token_id, file_object.tell())) + try: + file_object.seek( + (offset + length) - file_object.tell(), os.SEEK_CUR) + except (IOError, construct.FieldError) as exception: + logging.warning( + u'Unable to jump to next entry with error: {0:s}'.format(exception)) + return + + # BSM can be in more than one OS: BSD, Solaris and Mac OS X. + if parser_context.platform == 'MacOSX': + # In Mac OS X the last two tokens are the return status and the trailer. + if len(extra_tokens) >= 2: + return_value = extra_tokens[-2:-1][0] + if (return_value.startswith('[BSM_TOKEN_RETURN32') or + return_value.startswith('[BSM_TOKEN_RETURN64')): + _ = extra_tokens.pop(len(extra_tokens)-2) + else: + return_value = 'Return unknown' + else: + return_value = 'Return unknown' + if extra_tokens: + trailer = extra_tokens[-1] + if trailer.startswith('[BSM_TOKEN_TRAILER'): + _ = extra_tokens.pop(len(extra_tokens)-1) + else: + trailer = 'Trailer unknown' + else: + trailer = 'Trailer unknown' + return MacBsmEvent( + event_type, timestamp, u'. '.join(extra_tokens), + return_value, trailer, offset) + else: + # Generic BSM format. + if extra_tokens: + trailer = extra_tokens[-1] + if trailer.startswith('[BSM_TOKEN_TRAILER'): + _ = extra_tokens.pop(len(extra_tokens)-1) + else: + trailer = 'Trailer unknown' + else: + trailer = 'Trailer unknown' + return BsmEvent( + event_type, timestamp, u'. '.join(extra_tokens), trailer, offset) + + def VerifyFile(self, parser_context, file_object): + """Check if the file is a BSM file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_event: file that we want to check. + + Returns: + True if this is a valid BSM file, otherwise False. + """ + if file_object.tell() != 0: + file_object.seek(0) + + # First part of the entry is always a Header. + try: + token_id = self.BSM_TYPE.parse_stream(file_object) + except (IOError, construct.FieldError): + return False + if token_id not in self.BSM_TYPE_LIST: + return False + + bsm_type, structure = self.BSM_TYPE_LIST.get(token_id, ['', '']) + try: + if bsm_type == 'BSM_HEADER32': + header = structure.parse_stream(file_object) + elif bsm_type == 'BSM_HEADER64': + header = structure.parse_stream(file_object) + elif bsm_type == 'BSM_HEADER32_EX': + header = structure.parse_stream(file_object) + else: + return False + except (IOError, construct.FieldError): + return False + if header.bsm_header.version != self.AUDIT_HEADER_VERSION: + return False + + try: + token_id = self.BSM_TYPE.parse_stream(file_object) + except (IOError, construct.FieldError): + return False + + # If is Mac OS X BSM file, next entry is a text token indicating + # if it is a normal start or it is a recovery track. + if parser_context.platform == 'MacOSX': + bsm_type_list = self.BSM_TYPE_LIST.get(token_id) + if not bsm_type_list: + return False + + if bsm_type_list[0] != 'BSM_TOKEN_TEXT': + logging.warning(u'It is not a valid first entry for Mac OS X BSM.') + return False + try: + token = self.BSM_TOKEN_TEXT.parse_stream(file_object) + except (IOError, construct.FieldError): + return + + text = self._CopyUtf8ByteArrayToString(token.text) + if (text != 'launchctl::Audit startup' and + text != 'launchctl::Audit recovery'): + logging.warning(u'It is not a valid first entry for Mac OS X BSM.') + return False + + file_object.seek(0) + return True + + def TryWithUntestedStructures(self, file_object, token_id, pending): + """Try to parse the pending part of the entry using untested structures. + + Args: + file_object: BSM file. + token_id: integer with the id that comes from the unknown token. + pending: pending length of the entry. + + Returns: + A list of extra tokens data that can be parsed using non-tested + structures. A message indicating that a structure cannot be parsed + is added for unparsed structures. + """ + # Data from the unknown structure. + start_position = file_object.tell() + start_token_id = token_id + extra_tokens = [] + + # Read all the "pending" bytes. + try: + if token_id in self.bsm_type_list_all: + token = self.bsm_type_list_all[token_id][1].parse_stream(file_object) + extra_tokens.append(self.FormatToken(token_id, token, file_object)) + while file_object.tell() < (start_position + pending): + # Check if it is a known token. + try: + token_id = self.BSM_TYPE.parse_stream(file_object) + except (IOError, construct.FieldError): + logging.warning( + u'Unable to parse the Token ID at position: {0:d}'.format( + file_object.tell())) + return + if token_id not in self.bsm_type_list_all: + break + token = self.bsm_type_list_all[token_id][1].parse_stream( + file_object) + extra_tokens.append(self.FormatToken(token_id, token, file_object)) + except (IOError, construct.FieldError): + token_id = 255 + + next_entry = (start_position + pending) + if file_object.tell() != next_entry: + # Unknown Structure. + logging.warning(u'Unknown Token at "0x{0:X}", ID: {1} (0x{2:X})'.format( + start_position-1, token_id, token_id)) + # TODO: another way to save this information must be found. + extra_tokens.append( + u'Plaso: some tokens from this entry can ' + u'not be saved. Entry at 0x{0:X} with unknown ' + u'token id "0x{1:X}".'.format( + start_position-1, start_token_id)) + # Move to next entry. + file_object.seek(next_entry - file_object.tell(), os.SEEK_CUR) + # It returns null list because it doesn't know witch structure was + # the incorrect structure that makes that it can arrive to the spected + # end of the entry. + return [] + return extra_tokens + + # TODO: instead of compare the text to know what structure was parsed + # is better to compare directly the numeric number (token_id), + # less readable, but better performance. + def FormatToken(self, token_id, token, file_object): + """Parse the Token depending of the type of the structure. + + Args: + token_id: Identification integer of the token_type. + token: Token struct to parse. + file_object: BSM file. + + Returns: + String with the parsed Token values. + """ + if token_id not in self.bsm_type_list_all: + return u'Type Unknown: {0:d} (0x{0:X})'.format(token_id) + + bsm_type, _ = self.bsm_type_list_all.get(token_id, ['', '']) + + if bsm_type in [ + 'BSM_TOKEN_TEXT', 'BSM_TOKEN_PATH', 'BSM_TOKEN_ZONENAME']: + try: + string = self._CopyUtf8ByteArrayToString(token.text) + except TypeError: + string = u'Unknown' + return u'[{0}: {1:s}]'.format(bsm_type, string) + + elif bsm_type in [ + 'BSM_TOKEN_RETURN32', 'BSM_TOKEN_RETURN64', 'BSM_TOKEN_EXIT']: + return u'[{0}: {1} ({2}), System call status: {3}]'.format( + bsm_type, bsmtoken.BSM_ERRORS.get(token.status, 'Unknown'), + token.status, token.return_value) + + elif bsm_type in ['BSM_TOKEN_SUBJECT32', 'BSM_TOKEN_SUBJECT64']: + return ( + u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' + u'pid({6}), session_id({7}), terminal_port({8}), ' + u'terminal_ip({9})]').format( + bsm_type, token.subject_data.audit_uid, + token.subject_data.effective_uid, + token.subject_data.effective_gid, + token.subject_data.real_uid, token.subject_data.real_gid, + token.subject_data.pid, token.subject_data.session_id, + token.terminal_port, self._IPv4Format(token.ipv4)) + + elif bsm_type in ['BSM_TOKEN_SUBJECT32_EX', 'BSM_TOKEN_SUBJECT64_EX']: + if token.bsm_ip_type_short.net_type == self.AU_IPv6: + ip = self._IPv6Format( + token.bsm_ip_type_short.ip_addr.high, + token.bsm_ip_type_short.ip_addr.low) + elif token.bsm_ip_type_short.net_type == self.AU_IPv4: + ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr) + else: + ip = 'unknown' + return ( + u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' + u'pid({6}), session_id({7}), terminal_port({8}), ' + u'terminal_ip({9})]').format( + bsm_type, token.subject_data.audit_uid, + token.subject_data.effective_uid, + token.subject_data.effective_gid, + token.subject_data.real_uid, token.subject_data.real_gid, + token.subject_data.pid, token.subject_data.session_id, + token.terminal_port, ip) + + elif bsm_type in ['BSM_TOKEN_ARGUMENT32', 'BSM_TOKEN_ARGUMENT64']: + string = self._CopyUtf8ByteArrayToString(token.text) + return u'[{0}: {1:s}({2}) is 0x{3:X}]'.format( + bsm_type, string, token.num_arg, token.name_arg) + + elif bsm_type in ['BSM_TOKEN_EXEC_ARGUMENTS', 'BSM_TOKEN_EXEC_ENV']: + arguments = [] + for _ in range(0, token): + sub_token = self.BSM_TOKEN_EXEC_ARGUMENT.parse_stream(file_object) + string = self._CopyUtf8ByteArrayToString(sub_token.text) + arguments.append(string) + return u'[{0}: {1:s}]'.format(bsm_type, u' '.join(arguments)) + + elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32': + return (u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format( + bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'), + token.net_type, token.port_number, self._IPv4Format(token.ipv4))) + + elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET128': + return u'[{0}: {1} ({2}) open in port {3}. Address {4}]'.format( + bsm_type, bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'), + token.net_type, token.port_number, + self._IPv6Format(token.ipv6.high, token.ipv6.low)) + + elif bsm_type == 'BSM_TOKEN_ADDR': + return u'[{0}: {1}]'.format(bsm_type, self._IPv4Format(token)) + + elif bsm_type == 'BSM_TOKEN_IP': + return u'[IPv4_Header: 0x{0:s}]'.format(token.encode('hex')) + + elif bsm_type == 'BSM_TOKEN_ADDR_EXT': + return u'[{0}: {1} ({2}). Address {3}]'.format( + bsm_type, + bsmtoken.BSM_PROTOCOLS.get(token.net_type, 'UNKNOWN'), + token.net_type, self._IPv6Format(token.ipv6.high, token.ipv6.low)) + + elif bsm_type == 'BSM_TOKEN_PORT': + return u'[{0}: {1}]'.format(bsm_type, token) + + elif bsm_type == 'BSM_TOKEN_TRAILER': + return u'[{0}: {1}]'.format(bsm_type, token.record_length) + + elif bsm_type == 'BSM_TOKEN_FILE': + # TODO: if this timestamp is usefull, it must be extracted as a separate + # event object. + timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( + token.timestamp, token.microsecond) + date_time = timelib.Timestamp.CopyToDatetime(timestamp, pytz.utc) + date_time_string = date_time.strftime('%Y-%m-%d %H:%M:%S') + + string = self._CopyUtf8ByteArrayToString(token.text) + return u'[{0}: {1:s}, timestamp: {2:s}]'.format( + bsm_type, string, date_time_string) + + elif bsm_type == 'BSM_TOKEN_IPC': + return u'[{0}: object type {1}, object id {2}]'.format( + bsm_type, token.object_type, token.object_id) + + elif bsm_type in ['BSM_TOKEN_PROCESS32', 'BSM_TOKEN_PROCESS64']: + return ( + u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' + u'pid({6}), session_id({7}), terminal_port({8}), ' + u'terminal_ip({9})]').format( + bsm_type, token.subject_data.audit_uid, + token.subject_data.effective_uid, + token.subject_data.effective_gid, + token.subject_data.real_uid, token.subject_data.real_gid, + token.subject_data.pid, token.subject_data.session_id, + token.terminal_port, self._IPv4Format(token.ipv4)) + + elif bsm_type in ['BSM_TOKEN_PROCESS32_EX', 'BSM_TOKEN_PROCESS64_EX']: + if token.bsm_ip_type_short.net_type == self.AU_IPv6: + ip = self._IPv6Format( + token.bsm_ip_type_short.ip_addr.high, + token.bsm_ip_type_short.ip_addr.low) + elif token.bsm_ip_type_short.net_type == self.AU_IPv4: + ip = self._IPv4Format(token.bsm_ip_type_short.ip_addr) + else: + ip = 'unknown' + return ( + u'[{0}: aid({1}), euid({2}), egid({3}), uid({4}), gid({5}), ' + u'pid({6}), session_id({7}), terminal_port({8}), ' + u'terminal_ip({9})]').format( + bsm_type, token.subject_data.audit_uid, + token.subject_data.effective_uid, + token.subject_data.effective_gid, + token.subject_data.real_uid, token.subject_data.real_gid, + token.subject_data.pid, token.subject_data.session_id, + token.terminal_port, ip) + + elif bsm_type == 'BSM_TOKEN_DATA': + data = [] + data_type = bsmtoken.BSM_TOKEN_DATA_TYPE.get(token.data_type, '') + if data_type == 'AUR_CHAR': + for _ in range(token.unit_count): + data.append(self.BSM_TOKEN_DATA_CHAR.parse_stream(file_object)) + elif data_type == 'AUR_SHORT': + for _ in range(token.unit_count): + data.append(self.BSM_TOKEN_DAT_SHORT.parse_stream(file_object)) + elif data_type == 'AUR_INT32': + for _ in range(token.unit_count): + data.append(self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object)) + else: + data.append(u'Unknown type data') + # TODO: the data when it is string ends with ".", HW a space is return + # after uses the UTF-8 conversion. + return u'[{0}: Format data: {1}, Data: {2}]'.format( + bsm_type, bsmtoken.BSM_TOKEN_DATA_PRINT[token.how_to_print], + self._RawToUTF8(u''.join(data))) + + elif bsm_type in ['BSM_TOKEN_ATTR32', 'BSM_TOKEN_ATTR64']: + return ( + u'[{0}: Mode: {1}, UID: {2}, GID: {3}, ' + u'File system ID: {4}, Node ID: {5}, Device: {6}]').format( + bsm_type, token.file_mode, token.uid, token.gid, + token.file_system_id, token.file_system_node_id, token.device) + + elif bsm_type == 'BSM_TOKEN_GROUPS': + arguments = [] + for _ in range(token): + arguments.append(self._RawToUTF8( + self.BSM_TOKEN_DATA_INTEGER.parse_stream(file_object))) + return u'[{0}: {1:s}]'.format(bsm_type, u','.join(arguments)) + + elif bsm_type == 'BSM_TOKEN_AUT_SOCKINET32_EX': + if bsmtoken.BSM_PROTOCOLS.get(token.socket_domain, '') == 'INET6': + saddr = self._IPv6Format( + token.structure_addr_port.saddr_high, + token.structure_addr_port.saddr_low) + daddr = self._IPv6Format( + token.structure_addr_port.daddr_high, + token.structure_addr_port.daddr_low) + else: + saddr = self._IPv4Format(token.structure_addr_port.source_address) + daddr = self._IPv4Format(token.structure_addr_port.destination_address) + + return u'[{0}: from {1} port {2} to {3} port {4}]'.format( + bsm_type, saddr, token.structure_addr_port.source_port, + daddr, token.structure_addr_port.destination_port) + + elif bsm_type == 'BSM_TOKEN_IPC_PERM': + return ( + u'[{0}: user id {1}, group id {2}, create user id {3}, ' + u'create group id {4}, access {5}]').format( + bsm_type, token.user_id, token.group_id, + token.creator_user_id, token.creator_group_id, token.access_mode) + + elif bsm_type == 'BSM_TOKEN_SOCKET_UNIX': + string = self._CopyUtf8ByteArrayToString(token.path) + return u'[{0}: Family {1}, Path {2:s}]'.format( + bsm_type, token.family, string) + + elif bsm_type == 'BSM_TOKEN_OPAQUE': + string = self._CopyByteArrayToBase16String(token.text) + return u'[{0}: {1:s}]'.format(bsm_type, string) + + elif bsm_type == 'BSM_TOKEN_SEQUENCE': + return u'[{0}: {1}]'.format(bsm_type, token) + + def _IPv6Format(self, high, low): + """Provide a readable IPv6 IP having the high and low part in 2 integers. + + Args: + high: 64 bits integers number with the high part of the IPv6. + low: 64 bits integers number with the low part of the IPv6. + + Returns: + String with a well represented IPv6. + """ + ipv6_string = self.IPV6_STRUCT.build( + construct.Container(high=high, low=low)) + # socket.inet_ntop not supported in Windows. + if hasattr(socket, 'inet_ntop'): + return socket.inet_ntop(socket.AF_INET6, ipv6_string) + else: + # TODO: this approach returns double "::", illegal IPv6 addr. + str_address = binascii.hexlify(ipv6_string) + address = [] + blank = False + for pos in range(0, len(str_address), 4): + if str_address[pos:pos + 4] == '0000': + if not blank: + address.append('') + blank = True + else: + blank = False + address.append(str_address[pos:pos + 4].lstrip('0')) + return u':'.join(address) + + def _IPv4Format(self, address): + """Change an integer IPv4 address value for its 4 octets representation. + + Args: + address: integer with the IPv4 address. + + Returns: + IPv4 address in 4 octect representation (class A, B, C, D). + """ + ipv4_string = self.IPV4_STRUCT.build(address) + return socket.inet_ntoa(ipv4_string) + + def _RawToUTF8(self, byte_stream): + """Copies a UTF-8 byte stream into a Unicode string. + + Args: + byte_stream: A byte stream containing an UTF-8 encoded string. + + Returns: + A Unicode string. + """ + try: + string = byte_stream.decode('utf-8') + except UnicodeDecodeError: + logging.warning( + u'Decode UTF8 failed, the message string may be cut short.') + string = byte_stream.decode('utf-8', errors='ignore') + return string.partition('\x00')[0] + + def _CopyByteArrayToBase16String(self, byte_array): + """Copies a byte array into a base-16 encoded Unicode string. + + Args: + byte_array: A byte array. + + Returns: + A base-16 encoded Unicode string. + """ + return u''.join(['{0:02x}'.format(byte) for byte in byte_array]) + + def _CopyUtf8ByteArrayToString(self, byte_array): + """Copies a UTF-8 encoded byte array into a Unicode string. + + Args: + byte_array: A byte array containing an UTF-8 encoded string. + + Returns: + A Unicode string. + """ + byte_stream = ''.join(map(chr, byte_array)) + + try: + string = byte_stream.decode('utf-8') + except UnicodeDecodeError: + logging.warning(u'Unable to decode UTF-8 formatted byte array.') + string = byte_stream.decode('utf-8', errors='ignore') + + string, _, _ = string.partition('\x00') + return string + + +manager.ParsersManager.RegisterParser(BsmParser) diff --git a/plaso/parsers/bsm_test.py b/plaso/parsers/bsm_test.py new file mode 100644 index 0000000..78faf9d --- /dev/null +++ b/plaso/parsers/bsm_test.py @@ -0,0 +1,197 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Basic Security Module (BSM) file parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import bsm as bsm_formatter +from plaso.lib import timelib_test +from plaso.parsers import bsm +from plaso.parsers import test_lib + + +class MacOSXBsmParserTest(test_lib.ParserTestCase): + """Tests for Basic Security Module (BSM) file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = bsm.BsmParser() + + def testParse(self): + """Tests the Parse function on a Mac OS X BSM file.""" + knowledge_base_values = {'guessed_os': 'MacOSX'} + test_file = self._GetTestFilePath(['apple.bsm']) + event_queue_consumer = self._ParseFile( + self._parser, test_file, knowledge_base_values=knowledge_base_values) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEqual(len(event_objects), 54) + + event_object = event_objects[0] + + self.assertEqual(event_object.data_type, 'mac:bsm:event') + + expected_msg = ( + u'Type: audit crash recovery (45029) ' + u'Return: [BSM_TOKEN_RETURN32: Success (0), System call status: 0] ' + u'Information: [BSM_TOKEN_TEXT: launchctl::Audit recovery]. ' + u'[BSM_TOKEN_PATH: /var/audit/20131104171720.crash_recovery]') + + expected_msg_short = ( + u'Type: audit crash recovery (45029) ' + u'Return: [BSM_TOKEN_RETURN32: Success (0), ...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-04 18:36:20.000381') + self.assertEqual(event_object.timestamp, expected_timestamp) + self.assertEqual(event_object.event_type, u'audit crash recovery (45029)') + + expected_extra_tokens = ( + u'[BSM_TOKEN_TEXT: launchctl::Audit recovery]. ' + u'[BSM_TOKEN_PATH: /var/audit/20131104171720.crash_recovery]') + self.assertEqual(event_object.extra_tokens, expected_extra_tokens) + + expected_return_value = ( + u'[BSM_TOKEN_RETURN32: Success (0), System call status: 0]') + self.assertEqual(event_object.return_value, expected_return_value) + + event_object = event_objects[15] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-04 18:36:26.000171') + self.assertEqual(event_object.timestamp, expected_timestamp) + self.assertEqual(event_object.event_type, u'user authentication (45023)') + + expected_extra_tokens = ( + u'[BSM_TOKEN_SUBJECT32: aid(4294967295), euid(92), egid(92), uid(92), ' + u'gid(92), pid(143), session_id(100004), terminal_port(143), ' + u'terminal_ip(0.0.0.0)]. ' + u'[BSM_TOKEN_TEXT: Verify password for record type Users ' + u'\'moxilo\' node \'/Local/Default\']') + self.assertEqual(event_object.extra_tokens, expected_extra_tokens) + + expected_return_value = ( + u'[BSM_TOKEN_RETURN32: Unknown (255), System call status: 5000]') + self.assertEqual(event_object.return_value, expected_return_value) + + event_object = event_objects[31] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-04 18:36:26.000530') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.event_type, u'SecSrvr AuthEngine (45025)') + expected_extra_tokens = ( + u'[BSM_TOKEN_SUBJECT32: aid(4294967295), euid(0), egid(0), uid(0), ' + u'gid(0), pid(67), session_id(100004), terminal_port(67), ' + u'terminal_ip(0.0.0.0)]. ' + u'[BSM_TOKEN_TEXT: system.login.done]. ' + u'[BSM_TOKEN_TEXT: system.login.done]') + self.assertEqual(event_object.extra_tokens, expected_extra_tokens) + + expected_return_value = ( + u'[BSM_TOKEN_RETURN32: Success (0), System call status: 0]') + self.assertEqual(event_object.return_value, expected_return_value) + + event_object = event_objects[50] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-04 18:37:36.000399') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.event_type, u'session end (44903)') + + expected_extra_tokens = ( + u'[BSM_TOKEN_ARGUMENT64: sflags(1) is 0x0]. ' + u'[BSM_TOKEN_ARGUMENT32: am_success(2) is 0x3000]. ' + u'[BSM_TOKEN_ARGUMENT32: am_failure(3) is 0x3000]. ' + u'[BSM_TOKEN_SUBJECT32: aid(4294967295), euid(0), egid(0), uid(0), ' + u'gid(0), pid(0), session_id(100015), terminal_port(0), ' + u'terminal_ip(0.0.0.0)]') + self.assertEqual(event_object.extra_tokens, expected_extra_tokens) + + expected_return_value = ( + u'[BSM_TOKEN_RETURN32: Success (0), System call status: 0]') + self.assertEqual(event_object.return_value, expected_return_value) + + +class OpenBsmParserTest(test_lib.ParserTestCase): + """Tests for Basic Security Module (BSM) file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = bsm.BsmParser() + + def testParse(self): + """Tests the Parse function on a "generic" BSM file.""" + knowledge_base_values = {'guessed_os': 'openbsm'} + test_file = self._GetTestFilePath(['openbsm.bsm']) + event_queue_consumer = self._ParseFile( + self._parser, test_file, knowledge_base_values=knowledge_base_values) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEqual(len(event_objects), 50) + + expected_extra_tokens = [ + u'[BSM_TOKEN_ARGUMENT32: test_arg32_token(3) is 0xABCDEF00]', + u'[BSM_TOKEN_DATA: Format data: String, Data: SomeData]', + u'[BSM_TOKEN_FILE: test, timestamp: 1970-01-01 20:42:45]', + u'[BSM_TOKEN_ADDR: 192.168.100.15]', + u'[IPv4_Header: 0x400000145478000040010000c0a8649bc0a86e30]', + u'[BSM_TOKEN_IPC: object type 1, object id 305419896]', + u'[BSM_TOKEN_PORT: 20480]', + u'[BSM_TOKEN_OPAQUE: aabbccdd]', + u'[BSM_TOKEN_PATH: /test/this/is/a/test]', + (u'[BSM_TOKEN_PROCESS32: aid(305419896), euid(19088743), ' + u'egid(591751049), uid(2557891634), gid(159868227), ' + u'pid(321140038), session_id(2542171492), ' + u'terminal_port(374945606), terminal_ip(127.0.0.1)]'), + (u'[BSM_TOKEN_PROCESS64: aid(305419896), euid(19088743), ' + u'egid(591751049), uid(2557891634), gid(159868227), ' + u'pid(321140038), session_id(2542171492), ' + u'terminal_port(374945606), terminal_ip(127.0.0.1)]'), + (u'[BSM_TOKEN_RETURN32: Invalid argument (22), ' + u'System call status: 305419896]'), + u'[BSM_TOKEN_SEQUENCE: 305419896]', + (u'[BSM_TOKEN_AUT_SOCKINET32_EX: ' + u'from 127.0.0.1 port 0 to 127.0.0.1 port 0]'), + (u'[BSM_TOKEN_SUBJECT32: aid(305419896), euid(19088743), ' + u'egid(591751049), uid(2557891634), gid(159868227), ' + u'pid(321140038), session_id(2542171492), ' + u'terminal_port(374945606), terminal_ip(127.0.0.1)]'), + (u'[BSM_TOKEN_SUBJECT32_EX: aid(305419896), euid(19088743), ' + u'egid(591751049), uid(2557891634), gid(159868227), ' + u'pid(321140038), session_id(2542171492), ' + u'terminal_port(374945606), terminal_ip(fe80::1)]'), + u'[BSM_TOKEN_TEXT: This is a test.]', + u'[BSM_TOKEN_ZONENAME: testzone]', + (u'[BSM_TOKEN_RETURN32: Argument list too long (7), ' + u'System call status: 4294967295]')] + + extra_tokens = [] + for event_object_index in range(0, 19): + extra_tokens.append(event_objects[event_object_index].extra_tokens) + + self.assertEqual(extra_tokens, expected_extra_tokens) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/chrome_cache.py b/plaso/parsers/chrome_cache.py new file mode 100644 index 0000000..7accdd1 --- /dev/null +++ b/plaso/parsers/chrome_cache.py @@ -0,0 +1,441 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Google Chrome and Chromium Cache files.""" + +import logging +import os + +import construct + +from dfvfs.resolver import resolver as path_spec_resolver +from dfvfs.path import factory as path_spec_factory + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.parsers import interface +from plaso.parsers import manager + + +class CacheAddress(object): + """Class that contains a cache address.""" + FILE_TYPE_SEPARATE = 0 + FILE_TYPE_BLOCK_RANKINGS = 1 + FILE_TYPE_BLOCK_256 = 2 + FILE_TYPE_BLOCK_1024 = 3 + FILE_TYPE_BLOCK_4096 = 4 + + _BLOCK_DATA_FILE_TYPES = [ + FILE_TYPE_BLOCK_RANKINGS, + FILE_TYPE_BLOCK_256, + FILE_TYPE_BLOCK_1024, + FILE_TYPE_BLOCK_4096] + + _FILE_TYPE_BLOCK_SIZES = [0, 36, 256, 1024, 4096] + + def __init__(self, cache_address): + """Initializes the cache address object. + + Args: + cache_address: the cache address value. + """ + super(CacheAddress, self).__init__() + self.block_number = None + self.block_offset = None + self.block_size = None + self.filename = None + self.value = cache_address + + if cache_address & 0x80000000: + self.is_initialized = u'True' + else: + self.is_initialized = u'False' + + self.file_type = (cache_address & 0x70000000) >> 28 + if not cache_address == 0x00000000: + if self.file_type == self.FILE_TYPE_SEPARATE: + file_selector = cache_address & 0x0fffffff + self.filename = u'f_{0:06x}'.format(file_selector) + + elif self.file_type in self._BLOCK_DATA_FILE_TYPES: + file_selector = (cache_address & 0x00ff0000) >> 16 + self.filename = u'data_{0:d}'.format(file_selector) + + file_block_size = self._FILE_TYPE_BLOCK_SIZES[self.file_type] + self.block_number = cache_address & 0x0000ffff + self.block_size = (cache_address & 0x03000000) >> 24 + self.block_size *= file_block_size + self.block_offset = 8192 + (self.block_number * file_block_size) + + +class CacheEntry(object): + """Class that contains a cache entry.""" + + def __init__(self): + """Initializes the cache entry object.""" + super(CacheEntry, self).__init__() + self.creation_time = None + self.hash = None + self.key = None + self.next = None + self.rankings_node = None + + +class IndexFile(object): + """Class that contains an index file.""" + + SIGNATURE = 0xc103cac3 + + _FILE_HEADER = construct.Struct( + 'chrome_cache_index_file_header', + construct.ULInt32('signature'), + construct.ULInt16('minor_version'), + construct.ULInt16('major_version'), + construct.ULInt32('number_of_entries'), + construct.ULInt32('stored_data_size'), + construct.ULInt32('last_created_file_number'), + construct.ULInt32('unknown1'), + construct.ULInt32('unknown2'), + construct.ULInt32('table_size'), + construct.ULInt32('unknown3'), + construct.ULInt32('unknown4'), + construct.ULInt64('creation_time'), + construct.Padding(208)) + + def __init__(self): + """Initializes the index file object.""" + super(IndexFile, self).__init__() + self._file_object = None + self.creation_time = None + self.version = None + self.index_table = [] + + def _ReadFileHeader(self): + """Reads the file header. + + Raises: + IOError: if the file header cannot be read. + """ + self._file_object.seek(0, os.SEEK_SET) + + try: + file_header = self._FILE_HEADER.parse_stream(self._file_object) + except construct.FieldError as exception: + raise IOError(u'Unable to parse file header with error: {0:s}'.format( + exception)) + + signature = file_header.get('signature') + + if signature != self.SIGNATURE: + raise IOError(u'Unsupported index file signature') + + self.version = u'{0:d}.{1:d}'.format( + file_header.get('major_version'), + file_header.get('minor_version')) + + if self.version not in [u'2.0', u'2.1']: + raise IOError(u'Unsupported index file version: {0:s}'.format( + self.version)) + + self.creation_time = file_header.get('creation_time') + + def _ReadIndexTable(self): + """Reads the index table.""" + cache_address_data = self._file_object.read(4) + + while len(cache_address_data) == 4: + value = construct.ULInt32('cache_address').parse(cache_address_data) + + if value: + cache_address = CacheAddress(value) + self.index_table.append(cache_address) + + cache_address_data = self._file_object.read(4) + + def Close(self): + """Closes the index file.""" + if self._file_object: + self._file_object.close() + self._file_object = None + + def Open(self, file_object): + """Opens the index file. + + Args: + file_object: the file object. + """ + self._file_object = file_object + self._ReadFileHeader() + # Skip over the LRU data, which is 112 bytes in size. + self._file_object.seek(112, os.SEEK_CUR) + self._ReadIndexTable() + + +class DataBlockFile(object): + """Class that contains a data block file.""" + + SIGNATURE = 0xc104cac3 + + _FILE_HEADER = construct.Struct( + 'chrome_cache_data_file_header', + construct.ULInt32('signature'), + construct.ULInt16('minor_version'), + construct.ULInt16('major_version'), + construct.ULInt16('file_number'), + construct.ULInt16('next_file_number'), + construct.ULInt32('block_size'), + construct.ULInt32('number_of_entries'), + construct.ULInt32('maximum_number_of_entries'), + construct.Array(4, construct.ULInt32('emtpy')), + construct.Array(4, construct.ULInt32('hints')), + construct.ULInt32('updating'), + construct.Array(5, construct.ULInt32('user'))) + + _CACHE_ENTRY = construct.Struct( + 'chrome_cache_entry', + construct.ULInt32('hash'), + construct.ULInt32('next_address'), + construct.ULInt32('rankings_node_address'), + construct.ULInt32('reuse_count'), + construct.ULInt32('refetch_count'), + construct.ULInt32('state'), + construct.ULInt64('creation_time'), + construct.ULInt32('key_size'), + construct.ULInt32('long_key_address'), + construct.Array(4, construct.ULInt32('data_stream_sizes')), + construct.Array(4, construct.ULInt32('data_stream_addresses')), + construct.ULInt32('flags'), + construct.Padding(16), + construct.ULInt32('self_hash'), + construct.Array(160, construct.UBInt8('key'))) + + def __init__(self): + """Initializes the data block file object.""" + super(DataBlockFile, self).__init__() + self._file_object = None + self.creation_time = None + self.block_size = None + self.number_of_entries = None + self.version = None + + def _ReadFileHeader(self): + """Reads the file header. + + Raises: + IOError: if the file header cannot be read. + """ + self._file_object.seek(0, os.SEEK_SET) + + try: + file_header = self._FILE_HEADER.parse_stream(self._file_object) + except construct.FieldError as exception: + raise IOError(u'Unable to parse file header with error: {0:s}'.format( + exception)) + + signature = file_header.get('signature') + + if signature != self.SIGNATURE: + raise IOError(u'Unsupported data block file signature') + + self.version = u'{0:d}.{1:d}'.format( + file_header.get('major_version'), + file_header.get('minor_version')) + + if self.version not in [u'2.0', u'2.1']: + raise IOError(u'Unsupported data block file version: {0:s}'.format( + self.version)) + + self.block_size = file_header.get('block_size') + self.number_of_entries = file_header.get('number_of_entries') + + def ReadCacheEntry(self, block_offset): + """Reads a cache entry.""" + self._file_object.seek(block_offset, os.SEEK_SET) + + try: + cache_entry_struct = self._CACHE_ENTRY.parse_stream(self._file_object) + except construct.FieldError as exception: + raise IOError(u'Unable to parse cache entry with error: {0:s}'.format( + exception)) + + cache_entry = CacheEntry() + + cache_entry.hash = cache_entry_struct.get('hash') + + cache_entry.next = CacheAddress(cache_entry_struct.get('next_address')) + cache_entry.rankings_node = CacheAddress(cache_entry_struct.get( + 'rankings_node_address')) + + cache_entry.creation_time = cache_entry_struct.get('creation_time') + + byte_array = cache_entry_struct.get('key') + string = u''.join(map(chr, byte_array)) + cache_entry.key, _, _ = string.partition(u'\x00') + + return cache_entry + + def Close(self): + """Closes the data block file.""" + if self._file_object: + self._file_object.close() + self._file_object = None + + def Open(self, file_object): + """Opens the data block file. + + Args: + file_object: the file object. + """ + self._file_object = file_object + self._ReadFileHeader() + + +class ChromeCacheEntryEvent(time_events.WebKitTimeEvent): + """Class that contains a Chrome Cache event.""" + + DATA_TYPE = 'chrome:cache:entry' + + def __init__(self, cache_entry): + """Initializes the event object. + + Args: + cache_entry: the cache entry (instance of CacheEntry). + """ + super(ChromeCacheEntryEvent, self).__init__( + cache_entry.creation_time, eventdata.EventTimestamp.CREATION_TIME) + self.original_url = cache_entry.key + + +class ChromeCacheParser(interface.BaseParser): + """Parses Chrome Cache files.""" + + NAME = 'chrome_cache' + DESCRIPTION = u'Parser for Chrome Cache files.' + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract event objects from Chrome Cache files. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + index_file = IndexFile() + try: + index_file.Open(file_object) + except IOError as exception: + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse index file {1:s} with error: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + # Build a lookup table for the data block files. + file_system = file_entry.GetFileSystem() + path_segments = file_system.SplitPath(file_entry.path_spec.location) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + data_block_files = {} + for cache_address in index_file.index_table: + if cache_address.filename not in data_block_files: + # Remove the previous filename from the path segments list and + # add one of the data block file. + path_segments.pop() + path_segments.append(cache_address.filename) + + # We need to pass only used arguments to the path specification + # factory otherwise it will raise. + kwargs = {} + if file_entry.path_spec.parent: + kwargs['parent'] = file_entry.path_spec.parent + kwargs['location'] = file_system.JoinPath(path_segments) + + data_block_file_path_spec = path_spec_factory.Factory.NewPathSpec( + file_entry.path_spec.TYPE_INDICATOR, **kwargs) + + try: + data_block_file_entry = path_spec_resolver.Resolver.OpenFileEntry( + data_block_file_path_spec) + except RuntimeError as exception: + logging.error(( + u'[{0:s}] Unable to open data block file: {1:s} while parsing ' + u'{2:s} with error: {3:s}').format( + parser_chain, kwargs['location'], + file_entry.path_spec.comparable, exception)) + data_block_file_entry = None + + if not data_block_file_entry: + logging.error(u'Missing data block file: {0:s}'.format( + cache_address.filename)) + data_block_file = None + + else: + data_block_file_object = data_block_file_entry.GetFileObject() + data_block_file = DataBlockFile() + + try: + data_block_file.Open(data_block_file_object) + except IOError as exception: + logging.error(( + u'Unable to open data block file: {0:s} with error: ' + u'{1:s}').format(cache_address.filename, exception)) + data_block_file = None + + data_block_files[cache_address.filename] = data_block_file + + # Parse the cache entries in the data block files. + for cache_address in index_file.index_table: + cache_address_chain_length = 0 + while cache_address.value != 0x00000000: + if cache_address_chain_length >= 64: + logging.error(u'Maximum allowed cache address chain length reached.') + break + + data_file = data_block_files.get(cache_address.filename, None) + if not data_file: + logging.debug(u'Cache address: 0x{0:08x} missing data file.'.format( + cache_address.value)) + break + + try: + cache_entry = data_file.ReadCacheEntry(cache_address.block_offset) + except (IOError, UnicodeDecodeError) as exception: + logging.error( + u'Unable to parse cache entry with error: {0:s}'.format( + exception)) + break + + event_object = ChromeCacheEntryEvent(cache_entry) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + cache_address = cache_entry.next + cache_address_chain_length += 1 + + for data_block_file in data_block_files.itervalues(): + if data_block_file: + data_block_file.Close() + + index_file.Close() + + +manager.ParsersManager.RegisterParser(ChromeCacheParser) diff --git a/plaso/parsers/chrome_cache_test.py b/plaso/parsers/chrome_cache_test.py new file mode 100644 index 0000000..1b14baa --- /dev/null +++ b/plaso/parsers/chrome_cache_test.py @@ -0,0 +1,60 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Chrome Cache files parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import chrome_cache as chrome_cache_formatter +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import chrome_cache + + +class ChromeCacheParserTest(test_lib.ParserTestCase): + """Tests for the Chrome Cache files parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = chrome_cache.ChromeCacheParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['chrome_cache', 'index']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 217) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-04-30 16:44:36.226091') + self.assertEqual(event_object.timestamp, expected_timestamp) + + expected_original_url = ( + u'https://s.ytimg.com/yts/imgbin/player-common-vfliLfqPT.webp') + self.assertEqual(event_object.original_url, expected_original_url) + + expected_string = u'Original URL: {0:s}'.format(expected_original_url) + + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/context.py b/plaso/parsers/context.py new file mode 100644 index 0000000..793cdcf --- /dev/null +++ b/plaso/parsers/context.py @@ -0,0 +1,289 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The parser context object.""" + +import os + +from dfvfs.lib import definitions as dfvfs_definitions + +from plaso.lib import event +from plaso.lib import utils + + +class ParserContext(object): + """Class that implements the parser context.""" + + def __init__( + self, event_queue_producer, parse_error_queue_producer, knowledge_base): + """Initializes a parser context object. + + Args: + event_queue_producer: the event object queue producer (instance of + ItemQueueProducer). + parse_error_queue_producer: the parse error queue producer (instance of + ItemQueueProducer). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + """ + super(ParserContext, self).__init__() + self._abort = False + self._event_queue_producer = event_queue_producer + self._filter_object = None + self._knowledge_base = knowledge_base + self._mount_path = None + self._parse_error_queue_producer = parse_error_queue_producer + self._text_prepend = None + + self.number_of_events = 0 + self.number_of_parse_errors = 0 + + @property + def abort(self): + """Read-only value to indicate the parsing should be aborted.""" + return self._abort + + @property + def codepage(self): + """The codepage.""" + return self._knowledge_base.codepage + + @property + def hostname(self): + """The hostname.""" + return self._knowledge_base.hostname + + @property + def knowledge_base(self): + """The knowledge base.""" + return self._knowledge_base + + @property + def platform(self): + """The platform.""" + return self._knowledge_base.platform + + @property + def timezone(self): + """The timezone object.""" + return self._knowledge_base.timezone + + @property + def year(self): + """The year.""" + return self._knowledge_base.year + + def GetDisplayName(self, file_entry): + """Retrieves the display name for the file entry. + + Args: + file_entry: a file entry object (instance of dfvfs.FileEntry). + + Returns: + A string containing the display name. + """ + relative_path = self.GetRelativePath(file_entry) + if not relative_path: + return file_entry.name + + return u'{0:s}:{1:s}'.format( + file_entry.path_spec.type_indicator, relative_path) + + def GetRelativePath(self, file_entry): + """Retrieves the relative path of the file entry. + + Args: + file_entry: a file entry object (instance of dfvfs.FileEntry). + + Returns: + A string containing the relative path or None. + """ + path_spec = getattr(file_entry, 'path_spec', None) + if not path_spec: + return + + # TODO: Solve this differently, quite possibly inside dfVFS using mount + # path spec. + file_path = getattr(path_spec, 'location', None) + + if path_spec.type_indicator != dfvfs_definitions.TYPE_INDICATOR_OS: + return file_path + + # If we are parsing a mount point we don't want to include the full + # path to file's location here, we are only interested in the relative + # path to the mount point. + if self._mount_path: + _, _, file_path = file_path.partition(self._mount_path) + + return file_path + + def MatchesFilter(self, event_object): + """Checks if the event object matces the filter. + + Args: + event_object: the event object (instance of EventObject). + + Returns: + A boolean value indicating if the event object matches the filter. + """ + return self._filter_object and self._filter_object.Matches(event_object) + + def ProcessEvent( + self, event_object, parser_chain=None, file_entry=None, query=None): + """Processes an event before it is emitted to the event queue. + + Args: + event_object: the event object (instance of EventObject). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + query: Optional query string. The default is None. + """ + if not getattr(event_object, 'parser', None) and parser_chain: + event_object.parser = parser_chain + + # TODO: deprecate text_prepend in favor of an event tag. + if not getattr(event_object, 'text_prepend', None) and self._text_prepend: + event_object.text_prepend = self._text_prepend + + display_name = None + if file_entry: + event_object.pathspec = file_entry.path_spec + + if not getattr(event_object, 'filename', None): + event_object.filename = self.GetRelativePath(file_entry) + + if not display_name: + # TODO: dfVFS refactor: move display name to output since the path + # specification contains the full information. + display_name = self.GetDisplayName(file_entry) + + stat_object = file_entry.GetStat() + inode_number = getattr(stat_object, 'ino', None) + if not hasattr(event_object, 'inode') and inode_number: + # TODO: clean up the GetInodeValue function. + event_object.inode = utils.GetInodeValue(inode_number) + + if not getattr(event_object, 'display_name', None) and display_name: + event_object.display_name = display_name + + if not getattr(event_object, 'hostname', None) and self.hostname: + event_object.hostname = self.hostname + + if not getattr(event_object, 'username', None): + user_sid = getattr(event_object, 'user_sid', None) + username = self._knowledge_base.GetUsernameByIdentifier(user_sid) + if username: + event_object.username = username + + if not getattr(event_object, 'query', None) and query: + event_object.query = query + + def ProduceEvent( + self, event_object, parser_chain=None, file_entry=None, query=None): + """Produces an event onto the queue. + + Args: + event_object: the event object (instance of EventObject). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + query: Optional query string. The default is None. + """ + self.ProcessEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry, + query=query) + + if self.MatchesFilter(event_object): + return + + self._event_queue_producer.ProduceItem(event_object) + self.number_of_events += 1 + + def ProduceEvents( + self, event_objects, parser_chain=None, file_entry=None, query=None): + """Produces events onto the queue. + + Args: + event_objects: a list or generator of event objects (instances of + EventObject). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + query: Optional query string. The default is None. + """ + for event_object in event_objects: + self.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry, + query=query) + + def ProduceParseError(self, name, description, file_entry=None): + """Produces a parse error. + + Args: + name: The parser or plugin name. + description: The description of the error. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + """ + if self._parse_error_queue_producer: + path_spec = getattr(file_entry, 'path_spec', None) + parse_error = event.ParseError(name, description, path_spec=path_spec) + self._parse_error_queue_producer.ProduceItem(parse_error) + self.number_of_parse_errors += 1 + + def ResetCounters(self): + """Resets the counters.""" + self.number_of_events = 0 + self.number_of_parse_errors = 0 + + def SetFilterObject(self, filter_object): + """Sets the filter object. + + Args: + filter_object: the filter object (instance of objectfilter.Filter). + """ + self._filter_object = filter_object + + def SetMountPath(self, mount_path): + """Sets the mount path. + + Args: + mount_path: string containing the mount path. + """ + # Remove a trailing path separator from the mount path so the relative + # paths will start with a path separator. + if mount_path and mount_path.endswith(os.sep): + mount_path = mount_path[:-1] + + self._mount_path = mount_path + + def SetTextPrepend(self, text_prepend): + """Sets the text prepend. + + Args: + text_prepend: string that contains the text to prepend to every event. + """ + self._text_prepend = text_prepend + + def SignalAbort(self): + """Signals the parsers to abort.""" + self._abort = True diff --git a/plaso/parsers/cookie_plugins/__init__.py b/plaso/parsers/cookie_plugins/__init__.py new file mode 100644 index 0000000..e014011 --- /dev/null +++ b/plaso/parsers/cookie_plugins/__init__.py @@ -0,0 +1,19 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an import statement for each browser cookie plugin.""" + +from plaso.parsers.cookie_plugins import ganalytics diff --git a/plaso/parsers/cookie_plugins/ganalytics.py b/plaso/parsers/cookie_plugins/ganalytics.py new file mode 100644 index 0000000..175f0d8 --- /dev/null +++ b/plaso/parsers/cookie_plugins/ganalytics.py @@ -0,0 +1,221 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a plugin for parsing Google Analytics cookies.""" + +import urllib + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.parsers.cookie_plugins import interface + + +class GoogleAnalyticsEvent(time_events.PosixTimeEvent): + """A simple placeholder for a Google Analytics event.""" + + DATA_TYPE = u'cookie:google:analytics' + + def __init__( + self, timestamp, timestamp_desc, url, data_type_append, cookie_name, + **kwargs): + """Initialize a Google Analytics event. + + Args: + timestamp: The timestamp in a POSIX format. + timestamp_desc: A string describing the timestamp. + url: The full URL where the cookie got set. + data_type_append: String to append to the data type. + cookie_name: The name of the cookie. + """ + super(GoogleAnalyticsEvent, self).__init__( + timestamp, timestamp_desc, u'{0:s}:{1:s}'.format( + self.DATA_TYPE, data_type_append)) + + self.url = url + self.cookie_name = cookie_name + + for key, value in kwargs.iteritems(): + setattr(self, key, value) + + +class GoogleAnalyticsUtmzPlugin(interface.CookiePlugin): + """A browser cookie plugin for Google Analytics cookies.""" + + NAME = 'google_analytics_utmz' + + COOKIE_NAME = u'__utmz' + + # Point to few sources for URL information. + URLS = [ + (u'http://www.dfinews.com/articles/2012/02/' + u'google-analytics-cookies-and-forensic-implications')] + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, + cookie_data=None, url=None, **unused_kwargs): + """Extracts event objects from the cookie. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + cookie_data: The cookie data, as a byte string. + url: The full URL or path where the cookie got set. + """ + # The structure of the field: + # .... + fields = cookie_data.split('.') + + if len(fields) > 5: + variables = u'.'.join(fields[4:]) + fields = fields[0:4] + fields.append(variables) + + if len(fields) != 5: + raise errors.WrongPlugin(u'Wrong number of fields. [{0:d} vs. 5]'.format( + len(fields))) + + domain_hash, last, sessions, sources, variables = fields + extra_variables = variables.split(u'|') + + kwargs = {} + for variable in extra_variables: + key, _, value = variable.partition(u'=') + try: + value_line = unicode(urllib.unquote(str(value)), 'utf-8') + except UnicodeDecodeError: + value_line = repr(value) + + kwargs[key] = value_line + + event_object = GoogleAnalyticsEvent( + int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME, + url, 'utmz', self.COOKIE_NAME, domain_hash=domain_hash, + sessions=int(sessions, 10), sources=int(sources, 10), + **kwargs) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +class GoogleAnalyticsUtmaPlugin(interface.CookiePlugin): + """A browser cookie plugin for Google Analytics cookies.""" + + NAME = 'google_analytics_utma' + + COOKIE_NAME = u'__utma' + + # Point to few sources for URL information. + URLS = [ + (u'http://www.dfinews.com/articles/2012/02/' + u'google-analytics-cookies-and-forensic-implications')] + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, + cookie_data=None, url=None, **unused_kwargs): + """Extracts event objects from the cookie. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + cookie_data: The cookie data, as a byte string. + url: The full URL or path where the cookie got set. + """ + # Values has the structure of: + # .....<# of + # sessions> + fields = cookie_data.split(u'.') + + # Check for a valid record. + if len(fields) != 6: + raise errors.WrongPlugin(u'Wrong number of fields. [{0:d} vs. 6]'.format( + len(fields))) + + domain_hash, visitor_id, first_visit, previous, last, sessions = fields + + # TODO: Double check this time is stored in UTC and not local time. + first_epoch = int(first_visit, 10) + event_object = GoogleAnalyticsEvent( + first_epoch, 'Analytics Creation Time', url, 'utma', self.COOKIE_NAME, + domain_hash=domain_hash, visitor_id=visitor_id, + sessions=int(sessions, 10)) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + event_object = GoogleAnalyticsEvent( + int(previous, 10), 'Analytics Previous Time', url, 'utma', + self.COOKIE_NAME, domain_hash=domain_hash, visitor_id=visitor_id, + sessions=int(sessions, 10)) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + event_object = GoogleAnalyticsEvent( + int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME, + url, 'utma', self.COOKIE_NAME, domain_hash=domain_hash, + visitor_id=visitor_id, sessions=int(sessions, 10)) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +class GoogleAnalyticsUtmbPlugin(interface.CookiePlugin): + """A browser cookie plugin for Google Analytics cookies.""" + + NAME = 'google_analytics_utmb' + + COOKIE_NAME = u'__utmb' + + # Point to few sources for URL information. + URLS = [ + (u'http://www.dfinews.com/articles/2012/02/' + u'google-analytics-cookies-and-forensic-implications')] + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, + cookie_data=None, url=None, **unused_kwargs): + """Extracts event objects from the cookie. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + cookie_data: The cookie data, as a byte string. + url: The full URL or path where the cookie got set. + """ + # Values has the structure of: + # ..10. + fields = cookie_data.split(u'.') + + # Check for a valid record. + if len(fields) != 4: + raise errors.WrongPlugin(u'Wrong number of fields. [{0:d} vs. 4]'.format( + len(fields))) + + domain_hash, pages_viewed, _, last = fields + + event_object = GoogleAnalyticsEvent( + int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME, + url, 'utmb', self.COOKIE_NAME, domain_hash=domain_hash, + pages_viewed=int(pages_viewed, 10)) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) diff --git a/plaso/parsers/cookie_plugins/ganalytics_test.py b/plaso/parsers/cookie_plugins/ganalytics_test.py new file mode 100644 index 0000000..e6a531c --- /dev/null +++ b/plaso/parsers/cookie_plugins/ganalytics_test.py @@ -0,0 +1,139 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Google Analytics cookies.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import ganalytics as ganalytics_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers.cookie_plugins import ganalytics +from plaso.parsers.sqlite_plugins import chrome_cookies +from plaso.parsers.sqlite_plugins import firefox_cookies +from plaso.parsers.sqlite_plugins import test_lib + + +class GoogleAnalyticsPluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Google Analytics plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + + def _GetAnalyticsCookies(self, event_queue_consumer): + """Return a list of analytics cookies.""" + cookies = [] + for event_object in self._GetEventObjectsFromQueue(event_queue_consumer): + if isinstance(event_object, ganalytics.GoogleAnalyticsEvent): + cookies.append(event_object) + return cookies + + def testParsingFirefox29CookieDatabase(self): + """Tests the Process function on a Firefox 29 cookie database file.""" + plugin = firefox_cookies.FirefoxCookiePlugin() + test_file = self._GetTestFilePath(['firefox_cookies.sqlite']) + event_queue_consumer = self._ParseDatabaseFileWithPlugin(plugin, test_file) + event_objects = self._GetAnalyticsCookies(event_queue_consumer) + + self.assertEquals(len(event_objects), 25) + + event_object = event_objects[14] + + self.assertEquals( + event_object.utmcct, + u'/frettir/erlent/2013/10/30/maelt_med_kerfisbundnum_hydingum/') + self.assertEquals( + event_object.timestamp, timelib_test.CopyStringToTimestamp( + '2013-10-30 21:56:06')) + self.assertEquals(event_object.url, u'http://ads.aha.is/') + self.assertEquals(event_object.utmcsr, u'mbl.is') + + expected_msg = ( + u'http://ads.aha.is/ (__utmz) Sessions: 1 Domain Hash: 137167072 ' + u'Sources: 1 Last source used to access: mbl.is Ad campaign ' + u'information: (referral) Last type of visit: referral Path to ' + u'the page of referring link: /frettir/erlent/2013/10/30/' + u'maelt_med_kerfisbundnum_hydingum/') + + self._TestGetMessageStrings( + event_object, expected_msg, u'http://ads.aha.is/ (__utmz)') + + def testParsingChromeCookieDatabase(self): + """Test the process function on a Chrome cookie database.""" + plugin = chrome_cookies.ChromeCookiePlugin() + test_file = self._GetTestFilePath(['cookies.db']) + event_queue_consumer = self._ParseDatabaseFileWithPlugin(plugin, test_file) + event_objects = self._GetAnalyticsCookies(event_queue_consumer) + + # The cookie database contains 560 entries in total. Out of them + # there are 75 events created by the Google Analytics plugin. + self.assertEquals(len(event_objects), 75) + # Check few "random" events to verify. + + # Check an UTMZ Google Analytics event. + event_object = event_objects[39] + self.assertEquals(event_object.utmctr, u'enders game') + self.assertEquals(event_object.domain_hash, u'68898382') + self.assertEquals(event_object.sessions, 1) + + expected_msg = ( + u'http://imdb.com/ (__utmz) Sessions: 1 Domain Hash: 68898382 ' + u'Sources: 1 Last source used to access: google Ad campaign ' + u'information: (organic) Last type of visit: organic Keywords ' + u'used to find site: enders game') + self._TestGetMessageStrings( + event_object, expected_msg, u'http://imdb.com/ (__utmz)') + + # Check the UTMA Google Analytics event. + event_object = event_objects[41] + self.assertEquals(event_object.timestamp_desc, u'Analytics Previous Time') + self.assertEquals(event_object.cookie_name, u'__utma') + self.assertEquals(event_object.visitor_id, u'1827102436') + self.assertEquals(event_object.sessions, 2) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-03-22 01:55:29') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'http://assets.tumblr.com/ (__utma) Sessions: 2 Domain Hash: ' + u'151488169 Visitor ID: 151488169') + self._TestGetMessageStrings( + event_object, expected_msg, u'http://assets.tumblr.com/ (__utma)') + + # Check the UTMB Google Analytics event. + event_object = event_objects[34] + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.LAST_VISITED_TIME) + self.assertEquals(event_object.cookie_name, u'__utmb') + self.assertEquals(event_object.domain_hash, u'154523900') + self.assertEquals(event_object.pages_viewed, 1) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-03-22 01:48:30') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'http://upressonline.com/ (__utmb) Pages Viewed: 1 Domain Hash: ' + u'154523900') + self._TestGetMessageStrings( + event_object, expected_msg, u'http://upressonline.com/ (__utmb)') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/cookie_plugins/interface.py b/plaso/parsers/cookie_plugins/interface.py new file mode 100644 index 0000000..3e03c53 --- /dev/null +++ b/plaso/parsers/cookie_plugins/interface.py @@ -0,0 +1,115 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an interface for browser cookie plugins.""" + +import abc + +from plaso.lib import errors +from plaso.lib import registry +from plaso.parsers import plugins + + +# TODO: move this into the parsers and plugins manager. +def GetPlugins(): + """Returns a list of all cookie plugins.""" + plugins_list = [] + for plugin_cls in CookiePlugin.classes.itervalues(): + parent_name = getattr(plugin_cls, 'parent_class_name', 'NOTHERE') + if parent_name != 'cookie': + continue + + plugins_list.append(plugin_cls()) + + return plugins_list + + +class CookiePlugin(plugins.BasePlugin): + """A browser cookie plugin for Plaso. + + This is a generic cookie parsing interface that can handle parsing + cookies from all browsers. + """ + __metaclass__ = registry.MetaclassRegistry + __abstract = True + + NAME = 'cookie' + + # The name of the cookie value that this plugin is designed to parse. + # This value is used to evaluate whether the plugin is the correct one + # to parse the browser cookie. + COOKIE_NAME = u'' + + def __init__(self): + """Initialize the browser cookie plugin.""" + super(CookiePlugin, self).__init__() + self.cookie_data = '' + + @abc.abstractmethod + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, + cookie_data=None, url=None, **kwargs): + """Extract and return EventObjects from the data structure. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + cookie_data: Optional cookie data, as a byte string. + url: Optional URL or path where the cookie got set. + """ + + def Process( + self, parser_context, file_entry=None, parser_chain=None, + cookie_name=None, cookie_data=None, url=None, **kwargs): + """Determine if this is the right plugin for this cookie. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + cookie_name: The name of the cookie value. + cookie_data: The cookie data, as a byte string. + url: The full URL or path where the cookie got set. + + Raises: + errors.WrongPlugin: If the cookie name differs from the one + supplied in COOKIE_NAME. + ValueError: If cookie_name or cookie_data are not set. + """ + if cookie_name is None or cookie_data is None: + raise ValueError(u'Cookie name or data are not set.') + + if cookie_name != self.COOKIE_NAME: + raise errors.WrongPlugin( + u'Not the correct cookie plugin for: {0:s} [{1:s}]'.format( + cookie_name, self.NAME)) + + # This will raise if unhandled keyword arguments are passed. + super(CookiePlugin, self).Process(parser_context, **kwargs) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + self.GetEntries( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + cookie_data=cookie_data, url=url) diff --git a/plaso/parsers/cookie_plugins/test_lib.py b/plaso/parsers/cookie_plugins/test_lib.py new file mode 100644 index 0000000..76e50a7 --- /dev/null +++ b/plaso/parsers/cookie_plugins/test_lib.py @@ -0,0 +1,24 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Browser cookie plugin related functions and classes for testing.""" + +from plaso.parsers import test_lib + + +class CookiePluginTestCase(test_lib.ParserTestCase): + """The unit test case for a browser cookie plugin.""" diff --git a/plaso/parsers/cups_ipp.py b/plaso/parsers/cups_ipp.py new file mode 100644 index 0000000..dcea9a9 --- /dev/null +++ b/plaso/parsers/cups_ipp.py @@ -0,0 +1,345 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The CUPS IPP Control Files Parser. + +CUPS IPP version 1.0: +* http://tools.ietf.org/html/rfc2565 +* http://tools.ietf.org/html/rfc2566 +* http://tools.ietf.org/html/rfc2567 +* http://tools.ietf.org/html/rfc2568 +* http://tools.ietf.org/html/rfc2569 +* http://tools.ietf.org/html/rfc2639 + +CUPS IPP version 1.1: +* http://tools.ietf.org/html/rfc2910 +* http://tools.ietf.org/html/rfc2911 +* http://tools.ietf.org/html/rfc3196 +* http://tools.ietf.org/html/rfc3510 + +CUPS IPP version 2.0: +* N/A +""" + +import construct +import logging +import os + +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +# TODO: RFC Pendings types: resolution, dateTime, rangeOfInteger. +# "dateTime" is not used by Mac OS, instead it uses integer types. +# TODO: Only tested against CUPS IPP Mac OS X. + + +class CupsIppEvent(event.EventObject): + """Convenience class for an cups ipp event.""" + + DATA_TYPE = 'cups:ipp:event' + + def __init__( + self, timestamp, timestamp_desc, data_dict): + """Initializes the event object. + + Args: + timestamp: Timestamp of the entry. + timestamp_desc: Description of the timestamp. + data_dict: Dictionary with all the pairs coming from IPP file. + user: String with the system user name. + owner: String with the real name of the user. + computer_name: String with the name of the computer. + printer_id: String with the identification name of the print. + uri: String with the URL of the CUPS service. + job_id: String with the identification id of the job. + job_name: String with the job name. + copies: Integer with the number of copies. + application: String with the application that prints the document. + doc_usingtype: String with the type of document. + data_dict: Dictionary with all the parsed data comming from the file. + """ + super(CupsIppEvent, self).__init__() + self.timestamp = timelib.Timestamp.FromPosixTime(timestamp) + self.timestamp_desc = timestamp_desc + # TODO: Find a better solution than to have join for each attribute. + self.user = self._ListToString(data_dict.get('user', None)) + self.owner = self._ListToString(data_dict.get('owner', None)) + self.computer_name = self._ListToString(data_dict.get( + 'computer_name', None)) + self.printer_id = self._ListToString(data_dict.get('printer_id', None)) + self.uri = self._ListToString(data_dict.get('uri', None)) + self.job_id = self._ListToString(data_dict.get('job_id', None)) + self.job_name = self._ListToString(data_dict.get('job_name', None)) + self.copies = data_dict.get('copies', 0)[0] + self.application = self._ListToString(data_dict.get('application', None)) + self.doc_type = self._ListToString(data_dict.get('doc_type', None)) + self.data_dict = data_dict + + def _ListToString(self, values): + """Returns a string from a list value using comma as a delimiter. + + If any value inside the list contains comma, which is the delimiter, + the entire field is surrounded with double quotes. + + Args: + values: A list or tuple containing the values. + + Returns: + A string containing all the values joined using comma as a delimiter + or None. + """ + if values is None: + return + + if type(values) not in (list, tuple): + return + + for index, value in enumerate(values): + if ',' in value: + values[index] = u'"{0:s}"'.format(value) + + try: + return u', '.join(values) + except UnicodeDecodeError as exception: + logging.error( + u'Unable to parse log line, with error: {0:s}'.format(exception)) + + +class CupsIppParser(interface.BaseParser): + """Parser for CUPS IPP files. """ + + NAME = 'cups_ipp' + DESCRIPTION = u'Parser for CUPS IPP files.' + + # INFO: + # For each file, we have only one document with three different timestamps: + # Created, process and finished. + # Format: + # [HEADER: MAGIC + KNOWN_TYPE][GROUP A]...[GROUP Z][GROUP_END: 0x03] + # GROUP: [GROUP ID][PAIR A]...[PAIR Z] where [PAIR: NAME + VALUE] + # GROUP ID: [1byte ID] + # PAIR: [TagID][\x00][Name][Value]) + # TagID: 1 byte integer with the type of "Value". + # Name: [Length][Text][\00] + # Name can be empty when the name has more than one value. + # Example: family name "lopez mata" with more than one surname. + # Type_Text + [0x06, family, 0x00] + [0x05, lopez, 0x00] + + # Type_Text + [0x00, 0x00] + [0x04, mata, 0x00] + # Value: can be integer, boolean, or text provided by TagID. + # If boolean, Value: [\x01][0x00(False)] or [\x01(True)] + # If integer, Value: [\x04][Integer] + # If text, Value: [Length text][Text][\00] + + # Magic number that identify the CUPS IPP supported version. + IPP_MAJOR_VERSION = 2 + IPP_MINOR_VERSION = 0 + # Supported Operation ID. + IPP_OP_ID = 5 + + # CUPS IPP File header. + CUPS_IPP_HEADER = construct.Struct( + 'cups_ipp_header_struct', + construct.UBInt8('major_version'), + construct.UBInt8('minor_version'), + construct.UBInt16('operation_id'), + construct.UBInt32('request_id')) + + # Group ID that indicates the end of the IPP Control file. + GROUP_END = 3 + # Identification Groups. + GROUP_LIST = [1, 2, 4, 5, 6, 7] + + # Type ID. + TYPE_GENERAL_INTEGER = 32 + TYPE_INTEGER = 33 + TYPE_ENUMERATION = 35 + TYPE_BOOL = 34 + + # Type of values that can be extracted. + INTEGER_8 = construct.UBInt8('integer') + INTEGER_32 = construct.UBInt32('integer') + TEXT = construct.PascalString( + 'text', + length_field=construct.UBInt8('length')) + BOOLEAN = construct.Struct( + 'boolean_value', + construct.Padding(1), + INTEGER_8) + INTEGER = construct.Struct( + 'integer_value', + construct.Padding(1), + INTEGER_32) + + # Name of the pair. + PAIR_NAME = construct.Struct( + 'pair_name', + TEXT, + construct.Padding(1)) + + # Specific CUPS IPP to generic name. + NAME_PAIR_TRANSLATION = { + 'printer-uri': u'uri', + 'job-uuid': u'job_id', + 'DestinationPrinterID': u'printer_id', + 'job-originating-user-name': u'user', + 'job-name': u'job_name', + 'document-format': u'doc_type', + 'job-originating-host-name': u'computer_name', + 'com.apple.print.JobInfo.PMApplicationName': u'application', + 'com.apple.print.JobInfo.PMJobOwner': u'owner'} + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract a entry from an CUPS IPP file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + parser_chain = self._BuildParserChain(parser_chain) + + file_object = file_entry.GetFileObject() + file_object.seek(0, os.SEEK_SET) + + try: + header = self.CUPS_IPP_HEADER.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + file_object.close() + raise errors.UnableToParseFile( + u'Unable to parse CUPS IPP Header with error: {0:s}'.format( + exception)) + + if (header.major_version != self.IPP_MAJOR_VERSION or + header.minor_version != self.IPP_MINOR_VERSION): + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] Unsupported version number.'.format(self.NAME)) + + if header.operation_id != self.IPP_OP_ID: + # Warn if the operation ID differs from the standard one. We should be + # able to parse the file nonetheless. + logging.debug( + u'[{0:s}] Unsupported operation identifier in file: {1:s}.'.format( + self.NAME, parser_context.GetDisplayName(file_entry))) + + # Read the pairs extracting the name and the value. + data_dict = {} + name, value = self.ReadPair(parser_context, file_entry, file_object) + while name or value: + # Translate the known "name" CUPS IPP to a generic name value. + pretty_name = self.NAME_PAIR_TRANSLATION.get(name, name) + data_dict.setdefault(pretty_name, []).append(value) + name, value = self.ReadPair(parser_context, file_entry, file_object) + + if u'time-at-creation' in data_dict: + event_object = CupsIppEvent( + data_dict['time-at-creation'][0], + eventdata.EventTimestamp.CREATION_TIME, data_dict) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if u'time-at-processing' in data_dict: + event_object = CupsIppEvent( + data_dict['time-at-processing'][0], + eventdata.EventTimestamp.START_TIME, data_dict) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if u'time-at-completed' in data_dict: + event_object = CupsIppEvent( + data_dict['time-at-completed'][0], + eventdata.EventTimestamp.END_TIME, data_dict) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + file_object.close() + + def ReadPair(self, parser_context, file_entry, file_object): + """Reads an attribute name and value pair from a CUPS IPP event. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + file_object: a file-like object that points to a file. + + Returns: + A list of name and value. If name and value cannot be read both are + set to None. + """ + # Pair = Type ID + Name + Value. + try: + # Can be: + # Group ID + IDtag = Group ID (1byte) + Tag ID (1byte) + '0x00'. + # IDtag = Tag ID (1byte) + '0x00'. + type_id = self.INTEGER_8.parse_stream(file_object) + if type_id == self.GROUP_END: + return None, None + + elif type_id in self.GROUP_LIST: + # If it is a group ID we must read the next byte that contains + # the first TagID. + type_id = self.INTEGER_8.parse_stream(file_object) + + # 0x00 separator character. + _ = self.INTEGER_8.parse_stream(file_object) + + except (IOError, construct.FieldError): + logging.warning( + u'[{0:s}] Unsupported identifier in file: {1:s}.'.format( + self.NAME, parser_context.GetDisplayName(file_entry))) + return None, None + + # Name = Length name + name + 0x00 + try: + name = self.PAIR_NAME.parse_stream(file_object).text + except (IOError, construct.FieldError): + logging.warning( + u'[{0:s}] Unsupported name in file: {1:s}.'.format( + self.NAME, parser_context.GetDisplayName(file_entry))) + return None, None + + # Value: can be integer, boolean or text select by Type ID. + try: + if type_id in [ + self.TYPE_GENERAL_INTEGER, self.TYPE_INTEGER, self.TYPE_ENUMERATION]: + value = self.INTEGER.parse_stream(file_object).integer + + elif type_id == self.TYPE_BOOL: + value = bool(self.BOOLEAN.parse_stream(file_object).integer) + + else: + value = self.TEXT.parse_stream(file_object) + + except (IOError, construct.FieldError): + logging.warning( + u'[{0:s}] Unsupported value in file: {1:s}.'.format( + self.NAME, parser_context.GetDisplayName(file_entry))) + return None, None + + return name, value + + +manager.ParsersManager.RegisterParser(CupsIppParser) diff --git a/plaso/parsers/cups_ipp_test.py b/plaso/parsers/cups_ipp_test.py new file mode 100644 index 0000000..d614da2 --- /dev/null +++ b/plaso/parsers/cups_ipp_test.py @@ -0,0 +1,98 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser test for Mac Cups IPP Log files.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import cups_ipp as cups_ipp_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import cups_ipp +from plaso.parsers import test_lib + + +class CupsIppParserTest(test_lib.ParserTestCase): + """The unit test for Mac Cups IPP parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = cups_ipp.CupsIppParser() + + def testParse(self): + """Tests the Parse function.""" + # TODO: only tested against Mac OS X Cups IPP (Version 2.0) + test_file = self._GetTestFilePath(['mac_cups_ipp']) + events = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(events) + + self.assertEqual(len(event_objects), 3) + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-03 18:07:21') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual( + event_object.timestamp_desc, + eventdata.EventTimestamp.CREATION_TIME) + self.assertEqual(event_object.application, u'LibreOffice') + self.assertEqual(event_object.job_name, u'Assignament 1') + self.assertEqual(event_object.computer_name, u'localhost') + self.assertEqual(event_object.copies, 1) + self.assertEqual(event_object.doc_type, u'application/pdf') + expected_job = u'urn:uuid:d51116d9-143c-3863-62aa-6ef0202de49a' + self.assertEqual(event_object.job_id, expected_job) + self.assertEqual(event_object.owner, u'Joaquin Moreno Garijo') + self.assertEqual(event_object.user, u'moxilo') + self.assertEqual(event_object.printer_id, u'RHULBW') + expected_uri = u'ipp://localhost:631/printers/RHULBW' + self.assertEqual(event_object.uri, expected_uri) + expected_msg = ( + u'User: moxilo ' + u'Owner: Joaquin Moreno Garijo ' + u'Job Name: Assignament 1 ' + u'Application: LibreOffice ' + u'Printer: RHULBW') + expected_msg_short = ( + u'Job Name: Assignament 1') + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[1] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-03 18:07:21') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual( + event_object.timestamp_desc, + eventdata.EventTimestamp.START_TIME) + + event_object = event_objects[2] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-03 18:07:32') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual( + event_object.timestamp_desc, + eventdata.EventTimestamp.END_TIME) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/custom_destinations.py b/plaso/parsers/custom_destinations.py new file mode 100644 index 0000000..730c285 --- /dev/null +++ b/plaso/parsers/custom_destinations.py @@ -0,0 +1,212 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for .customDestinations-ms files.""" + +import logging +import os + +import construct +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver + +from plaso.lib import errors +from plaso.parsers import interface +from plaso.parsers import manager +from plaso.parsers import winlnk + + +class CustomDestinationsParser(interface.BaseParser): + """Parses .customDestinations-ms files.""" + + NAME = 'custom_destinations' + DESCRIPTION = u'Parser for *.customDestinations-ms files.' + + # We cannot use the parser registry here since winlnk could be disabled. + # TODO: see if there is a more elegant solution for this. + _WINLNK_PARSER = winlnk.WinLnkParser() + + _LNK_GUID = '\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x46' + + _FILE_HEADER = construct.Struct( + 'file_header', + construct.ULInt32('unknown1'), + construct.ULInt32('unknown2'), + construct.ULInt32('unknown3'), + construct.ULInt32('header_values_type')) + + _HEADER_VALUE_TYPE_0 = construct.Struct( + 'header_value_type_0', + construct.ULInt32('number_of_characters'), + construct.String('string', lambda ctx: ctx.number_of_characters * 2), + construct.ULInt32('unknown1')) + + _HEADER_VALUE_TYPE_1_OR_2 = construct.Struct( + 'header_value_type_1_or_2', + construct.ULInt32('unknown1')) + + _ENTRY_HEADER = construct.Struct( + 'entry_header', + construct.String('guid', 16)) + + _FILE_FOOTER = construct.Struct( + 'file_footer', + construct.ULInt32('signature')) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from an *.customDestinations-ms file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + self.ParseFileObject( + parser_context, file_object, file_entry=file_entry, + parser_chain=parser_chain) + file_object.close() + + def ParseFileObject( + self, parser_context, file_object, file_entry=None, parser_chain=None): + """Extract data from an *.customDestinations-ms file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_object: A file-like object. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + UnableToParseFile: when the file cannot be parsed. + """ + parser_chain = self._BuildParserChain(parser_chain) + + try: + file_header = self._FILE_HEADER.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + raise errors.UnableToParseFile(( + u'Unable to parse Custom Destination file header with error: ' + u'{0:s}').format(exception)) + + if file_header.unknown1 != 2: + raise errors.UnableToParseFile(( + u'Unsupported Custom Destination file - invalid unknown1: ' + u'{0:d}.').format(file_header.unknown1)) + + if file_header.header_values_type > 2: + raise errors.UnableToParseFile(( + u'Unsupported Custom Destination file - invalid header value type: ' + u'{0:d}.').format(file_header.header_values_type)) + + if file_header.header_values_type == 0: + data_structure = self._HEADER_VALUE_TYPE_0 + else: + data_structure = self._HEADER_VALUE_TYPE_1_OR_2 + + try: + _ = data_structure.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + raise errors.UnableToParseFile(( + u'Unable to parse Custom Destination file header value with error: ' + u'{0:s}').format(exception)) + + file_size = file_object.get_size() + file_offset = file_object.get_offset() + remaining_file_size = file_size - file_offset + + # The Custom Destination file does not have a unique signature in + # the file header that is why we use the first LNK class identifier (GUID) + # as a signature. + first_guid_checked = False + while remaining_file_size > 4: + try: + entry_header = self._ENTRY_HEADER.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + if not first_guid_checked: + raise errors.UnableToParseFile(( + u'Unable to parse Custom Destination file entry header with ' + u'error: {0:s}').format(exception)) + else: + logging.warning(( + u'Unable to parse Custom Destination file entry header with ' + u'error: {0:s}').format(exception)) + break + + if entry_header.guid != self._LNK_GUID: + if not first_guid_checked: + raise errors.UnableToParseFile( + u'Unsupported Custom Destination file - invalid entry header.') + else: + logging.warning( + u'Unsupported Custom Destination file - invalid entry header.') + break + + first_guid_checked = True + file_offset += 16 + remaining_file_size -= 16 + + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_DATA_RANGE, range_offset=file_offset, + range_size=remaining_file_size, parent=file_entry.path_spec) + + try: + lnk_file_object = resolver.Resolver.OpenFileObject(path_spec) + except RuntimeError as exception: + logging.error(( + u'[{0:s}] Unable to open LNK file from {1:s} with error: ' + u'{2:s}').format( + parser_chain, + file_entry.path_spec.comparable.replace(u'\n', u';'), + exception)) + return + + display_name = u'{0:s} # 0x{1:08x}'.format( + parser_context.GetDisplayName(file_entry), file_offset) + + self._WINLNK_PARSER.ParseFileObject( + parser_context, lnk_file_object, file_entry=file_entry, + parser_chain=parser_chain, display_name=display_name) + + # We cannot trust the file size in the LNK data so we get the last offset + # that was read instead. + lnk_file_size = lnk_file_object.get_offset() + + lnk_file_object.close() + + file_offset += lnk_file_size + remaining_file_size -= lnk_file_size + + file_object.seek(file_offset, os.SEEK_SET) + + try: + file_footer = self._FILE_FOOTER.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + logging.warning(( + u'Unable to parse Custom Destination file footer with error: ' + u'{0:s}').format(exception)) + + if file_footer.signature != 0xbabffbab: + logging.warning( + u'Unsupported Custom Destination file - invalid footer signature.') + + +manager.ParsersManager.RegisterParser(CustomDestinationsParser) diff --git a/plaso/parsers/custom_destinations_test.py b/plaso/parsers/custom_destinations_test.py new file mode 100644 index 0000000..c22572a --- /dev/null +++ b/plaso/parsers/custom_destinations_test.py @@ -0,0 +1,116 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the .customDestinations-ms file parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winlnk as winlnk_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import custom_destinations + + +class CustomDestinationsParserTest(test_lib.ParserTestCase): + """Tests for the .customDestinations-ms file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = custom_destinations.CustomDestinationsParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath([ + u'5afe4de1b92fc382.customDestinations-ms']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEqual(len(event_objects), 108) + + # A shortcut event object. + # The last accessed timestamp. + event_object = event_objects[105] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-07-13 23:55:56.248103') + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.ACCESS_TIME) + self.assertEquals(event_object.timestamp, expected_timestamp) + + # The creation timestamp. + event_object = event_objects[106] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-07-13 23:55:56.248103') + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + self.assertEquals(event_object.timestamp, expected_timestamp) + + # The last modification timestamp. + event_object = event_objects[107] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-07-14 01:39:11.388000') + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.MODIFICATION_TIME) + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[@%systemroot%\\system32\\oobefldr.dll,-1262] ' + u'File size: 11776 ' + u'File attribute flags: 0x00000020 ' + u'Drive type: 3 ' + u'Drive serial number: 0x24ba718b ' + u'Local path: C:\\Windows\\System32\\GettingStarted.exe ' + u'cmd arguments: {DE3895CB-077B-4C38-B6E3-F3DE1E0D84FC} ' + u'%systemroot%\\system32\\control.exe /name Microsoft.Display ' + u'env location: %SystemRoot%\\system32\\GettingStarted.exe ' + u'Icon location: %systemroot%\\system32\\display.dll ' + u'Link target: [My Computer, C:\\, Windows, System32, ' + u'GettingStarted.exe]') + + expected_msg_short = ( + u'[@%systemroot%\\system32\\oobefldr.dll,-1262] ' + u'C:\\Windows\\System32\\GettingStarte...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + # A shell item event object. + event_object = event_objects[16] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2010-11-10 07:41:04') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'Name: System32 ' + u'Long name: System32 ' + u'NTFS file reference: 2331-1 ' + u'Origin: 5afe4de1b92fc382.customDestinations-ms') + + expected_msg_short = ( + u'Name: System32 ' + u'NTFS file reference: 2331-1 ' + u'Origin: 5afe4de1b92fc382.customDes...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/esedb.py b/plaso/parsers/esedb.py new file mode 100644 index 0000000..6613795 --- /dev/null +++ b/plaso/parsers/esedb.py @@ -0,0 +1,98 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Extensible Storage Engine (ESE) database files (EDB).""" + +import logging + +import pyesedb + +from plaso.lib import errors +from plaso.parsers import interface +from plaso.parsers import manager +from plaso.parsers import plugins + + +if pyesedb.get_version() < '20140301': + raise ImportWarning(u'EseDbParser requires at least pyesedb 20140301.') + + +class EseDbCache(plugins.BasePluginCache): + """A cache storing query results for ESEDB plugins.""" + + +class EseDbParser(interface.BasePluginsParser): + """Parses Extensible Storage Engine (ESE) database files (EDB).""" + + NAME = 'esedb' + DESCRIPTION = u'Parser for Extensible Storage Engine (ESE) database files.' + + _plugin_classes = {} + + def __init__(self): + """Initializes a parser object.""" + super(EseDbParser, self).__init__() + self._plugins = EseDbParser.GetPluginObjects() + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extracts data from an ESE database File. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + UnableToParseFile: when the file cannot be parsed. + """ + file_object = file_entry.GetFileObject() + esedb_file = pyesedb.file() + + try: + esedb_file.open_file_object(file_object) + except IOError as exception: + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s} with error: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + # Compare the list of available plugins. + cache = EseDbCache() + for plugin_object in self._plugins: + try: + plugin_object.Process( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + database=esedb_file, cache=cache) + + except errors.WrongPlugin: + logging.debug(( + u'[{0:s}] plugin: {1:s} cannot parse the ESE database: ' + u'{2:s}').format( + self.NAME, plugin_object.NAME, file_entry.name)) + + # TODO: explicitly clean up cache. + + esedb_file.close() + file_object.close() + + +manager.ParsersManager.RegisterParser(EseDbParser) diff --git a/plaso/parsers/esedb_plugins/__init__.py b/plaso/parsers/esedb_plugins/__init__.py new file mode 100644 index 0000000..b0ac3cc --- /dev/null +++ b/plaso/parsers/esedb_plugins/__init__.py @@ -0,0 +1,20 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains import statements for the ESE database plugins.""" + +from plaso.parsers.esedb_plugins import msie_webcache diff --git a/plaso/parsers/esedb_plugins/interface.py b/plaso/parsers/esedb_plugins/interface.py new file mode 100644 index 0000000..8a26599 --- /dev/null +++ b/plaso/parsers/esedb_plugins/interface.py @@ -0,0 +1,303 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the interface for ESE database plugins.""" + +import construct +import logging + +import pyesedb + +from plaso.lib import errors +from plaso.parsers import plugins + + +class EseDbPlugin(plugins.BasePlugin): + """The ESE database plugin interface.""" + + NAME = 'esedb' + + BINARY_DATA_COLUMN_TYPES = frozenset([ + pyesedb.column_types.BINARY_DATA, + pyesedb.column_types.LARGE_BINARY_DATA]) + + FLOATING_POINT_COLUMN_TYPES = frozenset([ + pyesedb.column_types.FLOAT_32BIT, + pyesedb.column_types.DOUBLE_64BIT]) + + INTEGER_COLUMN_TYPES = frozenset([ + pyesedb.column_types.CURRENCY, + pyesedb.column_types.DATE_TIME, + pyesedb.column_types.INTEGER_8BIT_UNSIGNED, + pyesedb.column_types.INTEGER_16BIT_SIGNED, + pyesedb.column_types.INTEGER_16BIT_UNSIGNED, + pyesedb.column_types.INTEGER_32BIT_SIGNED, + pyesedb.column_types.INTEGER_32BIT_UNSIGNED, + pyesedb.column_types.INTEGER_64BIT_SIGNED]) + + STRING_COLUMN_TYPES = frozenset([ + pyesedb.column_types.TEXT, + pyesedb.column_types.LARGE_TEXT]) + + _UINT64_BIG_ENDIAN = construct.UBInt64('value') + _UINT64_LITTLE_ENDIAN = construct.ULInt64('value') + + # Dictionary containing a callback method per table name. + # E.g. 'SystemIndex_0A': 'ParseSystemIndex_0A' + REQUIRED_TABLES = {} + OPTIONAL_TABLES = {} + + def __init__(self): + """Initializes the ESE database plugin.""" + super(EseDbPlugin, self).__init__() + self._required_tables = frozenset(self.REQUIRED_TABLES.keys()) + self._tables = {} + self._tables.update(self.REQUIRED_TABLES) + self._tables.update(self.OPTIONAL_TABLES) + + def _ConvertValueBinaryDataToStringAscii(self, value): + """Converts a binary data value into a string. + + Args: + value: The binary data value containing an ASCII string or None. + + Returns: + A string or None if value is None. + """ + if value: + return value.decode('ascii') + + def _ConvertValueBinaryDataToStringBase16(self, value): + """Converts a binary data value into a base-16 (hexadecimal) string. + + Args: + value: The binary data value or None. + + Returns: + A string or None if value is None. + """ + if value: + return value.encode('hex') + + def _ConvertValueBinaryDataToUBInt64(self, value): + """Converts a binary data value into an integer. + + Args: + value: The binary data value containing an unsigned 64-bit big-endian + integer. + + Returns: + An integer or None if value is None. + """ + if value: + return self._UINT64_BIG_ENDIAN.parse(value) + + def _ConvertValueBinaryDataToULInt64(self, value): + """Converts a binary data value into an integer. + + Args: + value: The binary data value containing an unsigned 64-bit little-endian + integer. + + Returns: + An integer or None if value is None. + """ + if value: + return self._UINT64_LITTLE_ENDIAN.parse(value) + + def _GetRecordValue(self, record, value_entry): + """Retrieves a specific value from the record. + + Args: + record: The ESE record object (instance of pyesedb.record). + value_entry: The value entry. + + Returns: + An object containing the value. + """ + column_type = record.get_column_type(value_entry) + value_data_flags = record.get_value_data_flags(value_entry) + + if value_data_flags & pyesedb.value_flags.MULTI_VALUE: + # TODO: implement + pass + + elif column_type == pyesedb.column_types.NULL: + return + + elif column_type == pyesedb.column_types.BOOLEAN: + # TODO: implement + pass + + elif column_type in self.INTEGER_COLUMN_TYPES: + return record.get_value_data_as_integer(value_entry) + + elif column_type in self.FLOATING_POINT_COLUMN_TYPES: + return record.get_value_data_as_floating_point(value_entry) + + elif column_type in self.STRING_COLUMN_TYPES: + return record.get_value_data_as_string(value_entry) + + elif column_type == pyesedb.column_types.GUID: + # TODO: implement + pass + + return record.get_value_data(value_entry) + + def _GetRecordValues(self, table_name, record, value_mappings=None): + """Retrieves the values from the record. + + Args: + table_name: The name of the table. + record: The ESE record object (instance of pyesedb.record). + value_mappings: Optional dict of value mappings, which map the column + name to a callback method. The default is None. + + Returns: + An dict containing the values. + """ + record_values = {} + + for value_entry in range(0, record.number_of_values): + column_name = record.get_column_name(value_entry) + if column_name in record_values: + logging.warning( + u'[{0:s}] duplicate column: {1:s} in table: {2:s}'.format( + self.NAME, column_name, table_name)) + continue + + value_callback = None + if value_mappings and column_name in value_mappings: + value_callback_method = value_mappings.get(column_name) + if value_callback_method: + value_callback = getattr(self, value_callback_method, None) + if value_callback is None: + logging.warning(( + u'[{0:s}] missing value callback method: {1:s} for column: ' + u'{2:s} in table: {3:s}').format( + self.NAME, value_callback_method, column_name, table_name)) + + value = self._GetRecordValue(record, value_entry) + if value_callback: + value = value_callback(value) + + record_values[column_name] = value + + return record_values + + def _GetTableNames(self, database): + """Retrieves the table names in a database. + + Args: + database: The ESE database object (instance of pyesedb.file). + + Returns: + A list of the table names. + """ + table_names = [] + for esedb_table in database.tables: + table_names.append(esedb_table.name) + + return table_names + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, database=None, + cache=None, **kwargs): + """Extracts event objects from the database. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + database: Optional ESE database object (instance of pyesedb.file). + The default is None. + cache: Optional cache object (instance of EseDbCache). The default is + None. + + Raises: + ValueError: If the database attribute is not valid. + """ + if database is None: + raise ValueError(u'Invalid database.') + + for table_name, callback_method in self._tables.iteritems(): + if not callback_method: + # Table names without a callback method are allowed to improve + # the detection of a database based on its table names. + continue + + callback = getattr(self, callback_method, None) + if callback is None: + logging.warning( + u'[{0:s}] missing callback method: {1:s} for table: {2:s}'.format( + self.NAME, callback_method, table_name)) + continue + + esedb_table = database.get_table_by_name(table_name) + if not esedb_table: + logging.warning(u'[{0:s}] missing table: {1:s}'.format( + self.NAME, table_name)) + continue + + # The database is passed in case the database contains table names + # that are assigned dynamically and cannot be defined by + # the table name-callback mechanism. + callback( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + database=database, table=esedb_table, cache=cache, **kwargs) + + def Process( + self, parser_context, file_entry=None, parser_chain=None, database=None, + cache=None, **kwargs): + """Determines if this is the appropriate plugin for the database. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + database: Optional ESE database object (instance of pyesedb.file). + The default is None. + cache: Optional cache object (instance of EseDbCache). The default is + None. + + Raises: + errors.WrongPlugin: If the database does not contain all the tables + defined in the required_tables set. + ValueError: If the database attribute is not valid. + """ + if database is None: + raise ValueError(u'Invalid database.') + + table_names = frozenset(self._GetTableNames(database)) + if self._required_tables.difference(table_names): + raise errors.WrongPlugin( + u'[{0:s}] required tables not found.'.format(self.NAME)) + + # This will raise if unhandled keyword arguments are passed. + super(EseDbPlugin, self).Process(parser_context, **kwargs) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + self.GetEntries( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + database=database, cache=cache, **kwargs) diff --git a/plaso/parsers/esedb_plugins/msie_webcache.py b/plaso/parsers/esedb_plugins/msie_webcache.py new file mode 100644 index 0000000..040355c --- /dev/null +++ b/plaso/parsers/esedb_plugins/msie_webcache.py @@ -0,0 +1,366 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for the Microsoft Internet Explorer WebCache ESE database. + +The WebCache database (WebCacheV01.dat or WebCacheV24.dat) are used by MSIE +as of version 10. +""" + +import logging + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import esedb +from plaso.parsers.esedb_plugins import interface + + +class MsieWebCacheContainersEventObject(time_events.FiletimeEvent): + """Convenience class for a MSIE WebCache Containers table event.""" + + DATA_TYPE = 'msie:webcache:containers' + + def __init__(self, timestamp, usage, record_values): + """Initializes the event. + + Args: + timestamp: The FILETIME timestamp value. + usage: The usage string, describing the timestamp value. + record_values: A dict object containing the record values. + """ + super(MsieWebCacheContainersEventObject, self).__init__(timestamp, usage) + + self.container_identifier = record_values.get('ContainerId', 0) + self.set_identifier = record_values.get('SetId', 0) + self.name = record_values.get('Name', u'') + self.directory = record_values.get('Directory', u'') + + +class MsieWebCacheContainerEventObject(time_events.FiletimeEvent): + """Convenience class for a MSIE WebCache Container table event.""" + + DATA_TYPE = 'msie:webcache:container' + + def __init__(self, timestamp, usage, record_values): + """Initializes the event. + + Args: + timestamp: The FILETIME timestamp value. + usage: The usage string, describing the timestamp value. + record_values: A dict object containing the record values. + """ + super(MsieWebCacheContainerEventObject, self).__init__(timestamp, usage) + + self.entry_identifier = record_values.get(u'EntryId', 0) + self.container_identifier = record_values.get(u'ContainerId', 0) + self.cache_identifier = record_values.get(u'CacheId', 0) + + url = record_values.get(u'Url', u'') + # Ignore URL that start with a binary value. + if ord(url[0]) >= 0x20: + self.url = url + self.redirect_url = record_values.get(u'RedirectUrl', u'') + + self.access_count = record_values.get(u'AccessCount', 0) + self.sync_count = record_values.get(u'SyncCount', 0) + + self.cached_filename = record_values.get('Filename', u'') + self.file_extension = record_values.get(u'FileExtension', u'') + self.cached_file_size = record_values.get(u'FileSize', 0) + + # Ignore non-Unicode request headers values. + request_headers = record_values.get(u'RequestHeaders', u'') + if type(request_headers) == unicode and request_headers: + self.request_headers = request_headers + + # Ignore non-Unicode response headers values. + response_headers = record_values.get(u'ResponseHeaders', u'') + if type(response_headers) == unicode and response_headers: + self.response_headers = response_headers + + +class MsieWebCacheLeakFilesEventObject(time_events.FiletimeEvent): + """Convenience class for a MSIE WebCache LeakFiles table event.""" + + DATA_TYPE = 'msie:webcache:leak_file' + + def __init__(self, timestamp, usage, record_values): + """Initializes the event. + + Args: + timestamp: The FILETIME timestamp value. + usage: The usage string, describing the timestamp value. + record_values: A dict object containing the record values. + """ + super(MsieWebCacheLeakFilesEventObject, self).__init__(timestamp, usage) + + self.leak_identifier = record_values.get('LeakId', 0) + self.cached_filename = record_values.get('Filename', u'') + + +class MsieWebCachePartitionsEventObject(time_events.FiletimeEvent): + """Convenience class for a MSIE WebCache Partitions table event.""" + + DATA_TYPE = 'msie:webcache:partitions' + + def __init__(self, timestamp, usage, record_values): + """Initializes the event. + + Args: + timestamp: The FILETIME timestamp value. + usage: The usage string, describing the timestamp value. + record_values: A dict object containing the record values. + """ + super(MsieWebCachePartitionsEventObject, self).__init__(timestamp, usage) + + self.partition_identifier = record_values.get('PartitionId', 0) + self.partition_type = record_values.get('PartitionType', 0) + self.directory = record_values.get('Directory', u'') + self.table_identifier = record_values.get('TableId', 0) + + +class MsieWebCacheEseDbPlugin(interface.EseDbPlugin): + """Parses a MSIE WebCache ESE database file.""" + + NAME = 'msie_webcache' + DESCRIPTION = u'Parser for MSIE WebCache ESE database files.' + + # TODO: add support for AppCache_#, AppCacheEntry_#, DependencyEntry_# + + REQUIRED_TABLES = { + 'Containers': 'ParseContainersTable', + 'LeakFiles': 'ParseLeakFilesTable', + 'Partitions': 'ParsePartitionsTable'} + + _CONTAINER_TABLE_VALUE_MAPPINGS = { + 'RequestHeaders': '_ConvertValueBinaryDataToStringAscii', + 'ResponseHeaders': '_ConvertValueBinaryDataToStringAscii'} + + def _ParseContainerTable( + self, parser_context, file_entry=None, parser_chain=None, table=None, + container_name=u'Unknown'): + """Parses a Container_# table. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. + table: Optional table object (instance of pyesedb.table). + container_name: Optional string that contains the container name. + The container name indicates the table type. + The default is a string containing 'Unknown'. + """ + if table is None: + logging.warning(u'[{0:s}] invalid Container_# table'.format(self.NAME)) + return + + for esedb_record in table.records: + # TODO: add support for: + # wpnidm, iecompat, iecompatua, DNTException, DOMStore + if container_name == u'Content': + value_mappings = self._CONTAINER_TABLE_VALUE_MAPPINGS + else: + value_mappings = None + + try: + record_values = self._GetRecordValues( + table.name, esedb_record, value_mappings=value_mappings) + except UnicodeDecodeError as exception: + logging.error(( + u'[{0:s}] Unable to return record values for {1:s} with error: ' + u'{2:s}').format( + parser_chain, + file_entry.path_spec.comparable.replace(u'\n', u';'), + exception)) + continue + + if (container_name in [ + u'Content', u'Cookies', u'History', u'iedownload'] or + container_name.startswith(u'MSHist')): + timestamp = record_values.get(u'SyncTime', 0) + if timestamp: + event_object = MsieWebCacheContainerEventObject( + timestamp, u'Synchronization time', record_values) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + timestamp = record_values.get(u'CreationTime', 0) + if timestamp: + event_object = MsieWebCacheContainerEventObject( + timestamp, eventdata.EventTimestamp.CREATION_TIME, record_values) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + timestamp = record_values.get(u'ExpiryTime', 0) + if timestamp: + event_object = MsieWebCacheContainerEventObject( + timestamp, eventdata.EventTimestamp.EXPIRATION_TIME, + record_values) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + timestamp = record_values.get(u'ModifiedTime', 0) + if timestamp: + event_object = MsieWebCacheContainerEventObject( + timestamp, eventdata.EventTimestamp.MODIFICATION_TIME, + record_values) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + timestamp = record_values.get(u'AccessedTime', 0) + if timestamp: + event_object = MsieWebCacheContainerEventObject( + timestamp, eventdata.EventTimestamp.ACCESS_TIME, record_values) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + timestamp = record_values.get(u'PostCheckTime', 0) + if timestamp: + event_object = MsieWebCacheContainerEventObject( + timestamp, u'Post check time', record_values) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + def ParseContainersTable( + self, parser_context, file_entry=None, parser_chain=None, database=None, + table=None, **unused_kwargs): + """Parses the Containers table. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + database: Optional database object (instance of pyesedb.file). + The default is None. + table: Optional table object (instance of pyesedb.table). + The default is None. + """ + if database is None: + logging.warning(u'[{0:s}] invalid database'.format(self.NAME)) + return + + if table is None: + logging.warning(u'[{0:s}] invalid Containers table'.format(self.NAME)) + return + + for esedb_record in table.records: + record_values = self._GetRecordValues(table.name, esedb_record) + + timestamp = record_values.get(u'LastScavengeTime', 0) + if timestamp: + event_object = MsieWebCacheContainersEventObject( + timestamp, u'Last Scavenge Time', record_values) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + timestamp = record_values.get(u'LastAccessTime', 0) + if timestamp: + event_object = MsieWebCacheContainersEventObject( + timestamp, eventdata.EventTimestamp.ACCESS_TIME, record_values) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + container_identifier = record_values.get(u'ContainerId', None) + container_name = record_values.get(u'Name', None) + + if not container_identifier or not container_name: + continue + + table_name = u'Container_{0:d}'.format(container_identifier) + esedb_table = database.get_table_by_name(table_name) + if not esedb_table: + logging.warning( + u'[{0:s}] missing table: {1:s}'.format(self.NAME, table_name)) + continue + + self._ParseContainerTable( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + table=esedb_table, container_name=container_name) + + def ParseLeakFilesTable( + self, parser_context, file_entry=None, parser_chain=None, database=None, + table=None, **unused_kwargs): + """Parses the LeakFiles table. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + database: Optional database object (instance of pyesedb.file). + The default is None. + table: Optional table object (instance of pyesedb.table). + The default is None. + """ + if database is None: + logging.warning(u'[{0:s}] invalid database'.format(self.NAME)) + return + + if table is None: + logging.warning(u'[{0:s}] invalid LeakFiles table'.format(self.NAME)) + return + + for esedb_record in table.records: + record_values = self._GetRecordValues(table.name, esedb_record) + + timestamp = record_values.get(u'CreationTime', 0) + if timestamp: + event_object = MsieWebCacheLeakFilesEventObject( + timestamp, eventdata.EventTimestamp.CREATION_TIME, record_values) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + def ParsePartitionsTable( + self, parser_context, file_entry=None, parser_chain=None, database=None, + table=None, **unused_kwargs): + """Parses the Partitions table. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + database: Optional database object (instance of pyesedb.file). + The default is None. + table: Optional table object (instance of pyesedb.table). + The default is None. + """ + if database is None: + logging.warning(u'[{0:s}] invalid database'.format(self.NAME)) + return + + if table is None: + logging.warning(u'[{0:s}] invalid Partitions table'.format(self.NAME)) + return + + for esedb_record in table.records: + record_values = self._GetRecordValues(table.name, esedb_record) + + timestamp = record_values.get(u'LastScavengeTime', 0) + if timestamp: + event_object = MsieWebCachePartitionsEventObject( + timestamp, u'Last Scavenge Time', record_values) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +esedb.EseDbParser.RegisterPlugin(MsieWebCacheEseDbPlugin) diff --git a/plaso/parsers/esedb_plugins/msie_webcache_test.py b/plaso/parsers/esedb_plugins/msie_webcache_test.py new file mode 100644 index 0000000..5535776 --- /dev/null +++ b/plaso/parsers/esedb_plugins/msie_webcache_test.py @@ -0,0 +1,71 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Microsoft Internet Explorer WebCache database.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import msie_webcache as msie_webcache_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers.esedb_plugins import msie_webcache +from plaso.parsers.esedb_plugins import test_lib + + +class MsieWebCacheEseDbPluginTest(test_lib.EseDbPluginTestCase): + """Tests for the MSIE WebCache ESE database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = msie_webcache.MsieWebCacheEseDbPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['WebCacheV01.dat']) + event_queue_consumer = self._ParseEseDbFileWithPlugin( + test_file, self._plugin) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1354) + + event_object = event_objects[0] + + self.assertEquals(event_object.container_identifier, 1) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-05-12 07:30:25.486198') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.ACCESS_TIME) + + expected_msg = ( + u'Container identifier: 1 ' + u'Set identifier: 0 ' + u'Name: Content ' + u'Directory: C:\\Users\\test\\AppData\\Local\\Microsoft\\Windows\\' + u'INetCache\\IE\\ ' + u'Table: Container_1') + expected_msg_short = ( + u'Directory: C:\\Users\\test\\AppData\\Local\\Microsoft\\Windows\\' + u'INetCache\\IE\\') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/esedb_plugins/test_lib.py b/plaso/parsers/esedb_plugins/test_lib.py new file mode 100644 index 0000000..2924c07 --- /dev/null +++ b/plaso/parsers/esedb_plugins/test_lib.py @@ -0,0 +1,76 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""ESEDB plugin related functions and classes for testing.""" + +import pyesedb + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.engine import single_process +from plaso.parsers import test_lib + + +class EseDbPluginTestCase(test_lib.ParserTestCase): + """The unit test case for ESE database based plugins.""" + + def _OpenEseDbFile(self, path): + """Opens an ESE database file and returns back a pyesedb.file object. + + Args: + path: The path to the ESE database test file. + """ + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=path) + file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) + + file_object = file_entry.GetFileObject() + esedb_file = pyesedb.file() + + esedb_file.open_file_object(file_object) + + return esedb_file + + def _ParseEseDbFileWithPlugin( + self, path, plugin_object, knowledge_base_values=None): + """Parses a file as an ESE database file and returns an event generator. + + Args: + path: The path to the ESE database test file. + plugin_object: The plugin object that is used to extract an event + generator. + knowledge_base_values: optional dict containing the knowledge base + values. The default is None. + + Returns: + An event object queue consumer object (instance of + TestEventObjectQueueConsumer). + """ + event_queue = single_process.SingleProcessQueue() + event_queue_consumer = test_lib.TestEventObjectQueueConsumer(event_queue) + + parse_error_queue = single_process.SingleProcessQueue() + + parser_context = self._GetParserContext( + event_queue, parse_error_queue, + knowledge_base_values=knowledge_base_values) + esedb_file = self._OpenEseDbFile(path) + plugin_object.Process(parser_context, database=esedb_file) + + return event_queue_consumer diff --git a/plaso/parsers/filestat.py b/plaso/parsers/filestat.py new file mode 100644 index 0000000..7cfa61a --- /dev/null +++ b/plaso/parsers/filestat.py @@ -0,0 +1,129 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""File system stat object parser.""" + +from dfvfs.lib import definitions as dfvfs_definitions + +from plaso.events import time_events +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +class FileStatEvent(time_events.TimestampEvent): + """File system stat event.""" + + DATA_TYPE = 'fs:stat' + + def __init__(self, timestamp, usage, allocated, size, fs_type): + """Initializes the event. + + Args: + timestamp: The timestamp value. + usage: The usage string describing the timestamp. + allocated: Boolean value to indicate the file entry is allocated. + size: The file size in bytes. + fs_type: The filesystem this timestamp is extracted from. + """ + super(FileStatEvent, self).__init__(timestamp, usage) + + self.offset = 0 + self.size = size + self.allocated = allocated + self.fs_type = fs_type + + +class FileStatParser(interface.BaseParser): + """Class that defines a file system stat object parser.""" + + NAME = 'filestat' + DESCRIPTION = u'Parser for file system stat information.' + + _TIME_ATTRIBUTES = frozenset([ + 'atime', 'bkup_time', 'ctime', 'crtime', 'dtime', 'mtime']) + + def _GetFileSystemTypeFromFileEntry(self, file_entry): + """Return a filesystem type string from a file entry object. + + Args: + file_entry: A file entry object (instance of vfs.file_entry.FileEntry). + + Returns: + A string indicating the file system type. + """ + file_system = file_entry.GetFileSystem() + type_indicator = file_system.type_indicator + + if type_indicator != dfvfs_definitions.TYPE_INDICATOR_TSK: + return type_indicator + + # TODO: Implement fs_type in dfVFS and remove this implementation + # once that is in place. + fs_info = file_system.GetFsInfo() + if fs_info.info: + type_string = unicode(fs_info.info.ftype) + if type_string.startswith('TSK_FS_TYPE'): + return type_string[12:] + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extracts event objects from a file system stat entry. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + stat_object = file_entry.GetStat() + if not stat_object: + return + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + file_system_type = self._GetFileSystemTypeFromFileEntry(file_entry) + + is_allocated = getattr(stat_object, 'allocated', True) + file_size = getattr(stat_object, 'size', None), + + for time_attribute in self._TIME_ATTRIBUTES: + timestamp = getattr(stat_object, time_attribute, None) + if timestamp is None: + continue + + nano_time_attribute = u'{0:s}_nano'.format(time_attribute) + nano_time_attribute = getattr(stat_object, nano_time_attribute, None) + + timestamp = timelib.Timestamp.FromPosixTime(timestamp) + if nano_time_attribute is not None: + # Note that the _nano values are in intervals of 100th nano seconds. + timestamp += nano_time_attribute / 10 + + # TODO: this also ignores any timestamp that equals 0. + # Is this the desired behavior? + if not timestamp: + continue + + event_object = FileStatEvent( + timestamp, time_attribute, is_allocated, file_size, file_system_type) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +manager.ParsersManager.RegisterParser(FileStatParser) diff --git a/plaso/parsers/filestat_test.py b/plaso/parsers/filestat_test.py new file mode 100644 index 0000000..6e42ce3 --- /dev/null +++ b/plaso/parsers/filestat_test.py @@ -0,0 +1,153 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for filestat parser.""" + +import unittest + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory + +# pylint: disable=unused-import +from plaso.formatters import filestat as filestat_formatter +from plaso.parsers import filestat +from plaso.parsers import test_lib + + +class FileStatTest(test_lib.ParserTestCase): + """Tests for filestat parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = filestat.FileStatParser() + + def testTSKFile(self): + """Read a file within an image file and make few tests.""" + test_file = self._GetTestFilePath([u'ímynd.dd']) + os_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=test_file) + tsk_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TSK, inode=15, location=u'/passwords.txt', + parent=os_path_spec) + + event_queue_consumer = self._ParseFileByPathSpec( + self._parser, tsk_path_spec) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The TSK file entry has 3 event objects. + self.assertEquals(len(event_objects), 3) + + def testZipFile(self): + """Test a ZIP file.""" + test_file = self._GetTestFilePath([u'syslog.zip']) + os_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=test_file) + zip_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_ZIP, location=u'/syslog', + parent=os_path_spec) + + event_queue_consumer = self._ParseFileByPathSpec( + self._parser, zip_path_spec) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The ZIP file has 1 event object. + self.assertEquals(len(event_objects), 1) + + def testGzipFile(self): + """Test a GZIP file.""" + test_file = self._GetTestFilePath([u'syslog.gz']) + os_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=test_file) + gzip_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_GZIP, parent=os_path_spec) + + event_queue_consumer = self._ParseFileByPathSpec( + self._parser, gzip_path_spec) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The gzip file has 1 event object. + self.assertEquals(len(event_objects), 1) + + def testTarFile(self): + """Test a TAR file.""" + test_file = self._GetTestFilePath([u'syslog.tar']) + os_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=test_file) + tar_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TAR, location=u'/syslog', + parent=os_path_spec) + + event_queue_consumer = self._ParseFileByPathSpec( + self._parser, tar_path_spec) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The tar file has 1 event object. + self.assertEquals(len(event_objects), 1) + + def testNestedFile(self): + """Test a nested file.""" + test_file = self._GetTestFilePath([u'syslog.tgz']) + os_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=test_file) + gzip_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_GZIP, parent=os_path_spec) + tar_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TAR, location=u'/syslog', + parent=gzip_path_spec) + + event_queue_consumer = self._ParseFileByPathSpec( + self._parser, tar_path_spec) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The tar file has 1 event object. + self.assertEquals(len(event_objects), 1) + + test_file = self._GetTestFilePath([u'syslog.tgz']) + os_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=test_file) + gzip_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_GZIP, parent=os_path_spec) + + event_queue_consumer = self._ParseFileByPathSpec( + self._parser, gzip_path_spec) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The gzip file has 1 event object. + self.assertEquals(len(event_objects), 1) + + def testNestedTSK(self): + """Test a nested TSK file.""" + test_file = self._GetTestFilePath([u'syslog_image.dd']) + os_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=test_file) + tsk_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_TSK, inode=11, location=u'/logs/hidden.zip', + parent=os_path_spec) + zip_path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_ZIP, location=u'/syslog', + parent=tsk_path_spec) + + event_queue_consumer = self._ParseFileByPathSpec( + self._parser, zip_path_spec) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The ZIP file has 1 event objects. + self.assertEquals(len(event_objects), 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/firefox_cache.py b/plaso/parsers/firefox_cache.py new file mode 100644 index 0000000..e8edb39 --- /dev/null +++ b/plaso/parsers/firefox_cache.py @@ -0,0 +1,246 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implements a parser for Firefox cache files.""" + +import collections +import logging +import os + +import construct +import pyparsing + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.parsers import interface +from plaso.parsers import manager + + +__author__ = 'Petter Bjelland (petter.bjelland@gmail.com)' + + +class FirefoxCacheEvent(time_events.PosixTimeEvent): + """Convenience class for a Firefox cache record event.""" + + DATA_TYPE = 'firefox:cache:record' + + def __init__(self, metadata, request_method, url, response_code): + super(FirefoxCacheEvent, self).__init__( + metadata.last_fetched, eventdata.EventTimestamp.ADDED_TIME) + + self.last_modified = metadata.last_modified + self.major = metadata.major + self.minor = metadata.minor + self.location = metadata.location + self.last_fetched = metadata.last_fetched + self.expire_time = metadata.expire_time + self.fetch_count = metadata.fetch_count + self.request_size = metadata.request_size + self.info_size = metadata.info_size + self.data_size = metadata.data_size + self.request_method = request_method + self.url = url + self.response_code = response_code + + +class FirefoxCacheParser(interface.BaseParser): + """Extract cached records from Firefox.""" + + NAME = 'firefox_cache' + DESCRIPTION = u'Parser for Firefox Cache files.' + + # Number of bytes allocated to a cache record metadata. + RECORD_HEADER_SIZE = 36 + + # Initial size of Firefox >= 4 cache files. + INITIAL_CACHE_FILE_SIZE = 1024 * 1024 * 4 + + # Smallest possible block size in Firefox cache files. + MIN_BLOCK_SIZE = 256 + + RECORD_HEADER_STRUCT = construct.Struct( + 'record_header', + construct.UBInt16('major'), + construct.UBInt16('minor'), + construct.UBInt32('location'), + construct.UBInt32('fetch_count'), + construct.UBInt32('last_fetched'), + construct.UBInt32('last_modified'), + construct.UBInt32('expire_time'), + construct.UBInt32('data_size'), + construct.UBInt32('request_size'), + construct.UBInt32('info_size')) + + ALTERNATIVE_CACHE_NAME = ( + pyparsing.Word(pyparsing.hexnums, exact=5) + pyparsing.Word('m', exact=1) + + pyparsing.Word(pyparsing.nums, exact=2)) + + FIREFOX_CACHE_CONFIG = collections.namedtuple( + u'firefox_cache_config', + u'block_size first_record_offset') + + REQUEST_METHODS = [ + u'GET', 'HEAD', 'POST', 'PUT', 'DELETE', + u'TRACE', 'OPTIONS', 'CONNECT', 'PATCH'] + + def _GetFirefoxConfig(self, file_entry): + """Determine cache file block size. Raises exception if not found.""" + + if file_entry.name[0:9] != '_CACHE_00': + try: + # Match alternative filename. Five hex characters + 'm' + two digit + # number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd' + # instead contain data only. + self.ALTERNATIVE_CACHE_NAME.parseString(file_entry.name) + except pyparsing.ParseException: + raise errors.UnableToParseFile(u'Not a Firefox cache file.') + + file_object = file_entry.GetFileObject() + + # There ought to be a valid record within the first 4MB. We use this + # limit to prevent reading large invalid files. + to_read = min(file_object.get_size(), self.INITIAL_CACHE_FILE_SIZE) + + while file_object.get_offset() < to_read: + offset = file_object.get_offset() + + try: + # We have not yet determined the block size, so we use the smallest + # possible size. + record = self.__NextRecord( + file_entry.name, file_object, self.MIN_BLOCK_SIZE) + + record_size = ( + self.RECORD_HEADER_SIZE + record.request_size + record.info_size) + + if record_size >= 4096: + # _CACHE_003_ + block_size = 4096 + elif record_size >= 1024: + # _CACHE_002_ + block_size = 1024 + else: + # _CACHE_001_ + block_size = 256 + + return self.FIREFOX_CACHE_CONFIG(block_size, offset) + + except IOError: + logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid record.'.format( + self.NAME, file_entry.name, offset)) + + raise errors.UnableToParseFile( + u'Could not find a valid cache record. ' + u'Not a Firefox cache file.') + + def __Accept(self, candidate, block_size): + """Determine whether the candidate is a valid cache record.""" + + record_size = ( + self.RECORD_HEADER_SIZE + candidate.request_size+ candidate.info_size) + + return ( + candidate.request_size > 0 and candidate.fetch_count > 0 and + candidate.major == 1 and record_size // block_size < 256) + + def __NextRecord(self, filename, file_object, block_size): + """Provide the next cache record.""" + + offset = file_object.get_offset() + + try: + candidate = self.RECORD_HEADER_STRUCT.parse_stream(file_object) + except (IOError, construct.FieldError): + raise IOError(u'Unable to parse stream.') + + if not self.__Accept(candidate, block_size): + # Move reader to next candidate block. + file_object.seek(block_size - self.RECORD_HEADER_SIZE, os.SEEK_CUR) + raise IOError(u'Not a valid Firefox cache record.') + + # The last byte in a request is null. + url = file_object.read(candidate.request_size)[:-1] + + # HTTP response header, even elements are keys, odd elements values. + headers = file_object.read(candidate.info_size) + + request_method, _, _ = ( + headers.partition('request-method\x00')[2].partition('\x00')) + + _, _, response_head = headers.partition('response-head\x00') + + response_code, _, _ = response_head.partition('\r\n') + + if request_method not in self.REQUEST_METHODS: + safe_headers = headers.decode('ascii', errors='replace') + logging.debug(( + u'[{0:s}] {1:s}:{2:d}: Unknown HTTP method \'{3:s}\'. Response ' + u'headers: \'{4:s}\'').format( + self.NAME, filename, offset, request_method, safe_headers)) + + if response_code[0:4] != 'HTTP': + safe_headers = headers.decode('ascii', errors='replace') + logging.debug(( + u'[{0:s}] {1:s}:{2:d}: Could not determine HTTP response code. ' + u'Response headers: \'{3:s}\'.').format( + self.NAME, filename, offset, safe_headers)) + + # A request can span multiple blocks, so we use modulo. + _, remainder = divmod(file_object.get_offset() - offset, block_size) + + # Move reader to next candidate block. Include the null-byte skipped above. + file_object.seek(block_size - remainder, os.SEEK_CUR) + + return FirefoxCacheEvent(candidate, request_method, url, response_code) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract records from a Firefox cache file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + firefox_config = self._GetFirefoxConfig(file_entry) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + file_object = file_entry.GetFileObject() + + file_object.seek(firefox_config.first_record_offset) + + while file_object.get_offset() < file_object.get_size(): + try: + event_object = self.__NextRecord( + file_entry.name, file_object, firefox_config.block_size) + + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + except IOError: + logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid cache record.'.format( + self.NAME, file_entry.name, + file_object.get_offset() - self.MIN_BLOCK_SIZE)) + + file_object.close() + + +manager.ParsersManager.RegisterParser(FirefoxCacheParser) diff --git a/plaso/parsers/firefox_cache_test.py b/plaso/parsers/firefox_cache_test.py new file mode 100644 index 0000000..603bb0a --- /dev/null +++ b/plaso/parsers/firefox_cache_test.py @@ -0,0 +1,188 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Firefox cache files parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import firefox_cache as firefox_cache_formatter +from plaso.lib import errors +from plaso.lib import timelib_test +from plaso.parsers import firefox_cache +from plaso.parsers import test_lib + + +__author__ = 'Petter Bjelland (petter.bjelland@gmail.com)' + + +class FirefoxCacheTest(test_lib.ParserTestCase): + """A unit test for the FirefoxCacheParser.""" + + def setUp(self): + self._parser = firefox_cache.FirefoxCacheParser() + + def VerifyMajorMinor(self, events): + """Verify that valid Firefox cahce version is extracted.""" + for event_object in events: + self.assertEquals(event_object.major, 1) + self.assertEquals(event_object.minor, 19) + + def testParseCache_InvalidFile(self): + """Verify that parser do not accept small, invalid files.""" + + test_file = self._GetTestFilePath(['firefox_cache', 'invalid_file']) + + with self.assertRaises(errors.UnableToParseFile): + _ = self._ParseFile(self._parser, test_file) + + def testParseCache_001(self): + """Test Firefox 28 cache file _CACHE_001_ parsing.""" + + test_file = self._GetTestFilePath( + ['firefox_cache', 'firefox28', '_CACHE_001_']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(574, len(event_objects)) + self.assertEquals( + event_objects[1].url, 'HTTP:http://start.ubuntu.com/12.04/sprite.png') + + self.assertEquals(event_objects[1].timestamp, + timelib_test.CopyStringToTimestamp('2014-04-21 14:13:35')) + + self.VerifyMajorMinor(event_objects) + + expected_msg = ( + u'Fetched 2 time(s) ' + u'[HTTP/1.0 200 OK] GET ' + u'"HTTP:http://start.ubuntu.com/12.04/sprite.png"') + expected_msg_short = ( + u'[HTTP/1.0 200 OK] GET ' + u'"HTTP:http://start.ubuntu.com/12.04/sprite.png"') + + self._TestGetMessageStrings( + event_objects[1], expected_msg, expected_msg_short) + + def testParseCache_002(self): + """Test Firefox 28 cache file _CACHE_002_ parsing.""" + + test_file = self._GetTestFilePath( + ['firefox_cache', 'firefox28', '_CACHE_002_']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(58, len(event_objects)) + self.assertEquals( + event_objects[2].url, + ('HTTP:http://www.google-analytics.com/__utm.gif?utmwv=5.5.0&utms=' + '1&utmn=1106893631&utmhn=www.dagbladet.no&utmcs=windows-1252&ut' + 'msr=1920x1080&utmvp=1430x669&utmsc=24-bit&utmul=en-us&utmje=0&' + 'utmfl=-&utmdt=Dagbladet.no%20-%20forsiden&utmhid=460894302&utm' + 'r=-&utmp=%2F&utmht=1398089458997&utmac=UA-3072159-1&utmcc=__ut' + 'ma%3D68537988.718312608.1398089459.1398089459.1398089459.1%3B%' + '2B__utmz%3D68537988.1398089459.1.1.utmcsr%3D(direct)%7Cutmccn' + '%3D(direct)%7Cutmcmd%3D(none)%3B&aip=1&utmu=qBQ~')) + + self.assertEquals(event_objects[1].timestamp, + timelib_test.CopyStringToTimestamp('2014-04-21 14:10:58')) + + self.VerifyMajorMinor(event_objects) + + def testParseCache_003(self): + """Test Firefox 28 cache file _CACHE_003_ parsing.""" + + test_file = self._GetTestFilePath( + ['firefox_cache', 'firefox28', '_CACHE_003_']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(4, len(event_objects)) + + self.assertEquals( + event_objects[3].url, + 'HTTP:https://ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js') + + self.assertEquals( + event_objects[3].timestamp, + timelib_test.CopyStringToTimestamp('2014-04-21 14:11:07')) + + self.VerifyMajorMinor(event_objects) + + def testParseAlternativeFilename(self): + """Test Firefox 28 cache 003 file with alternative filename.""" + + test_file = self._GetTestFilePath( + ['firefox_cache', 'firefox28', 'E8D65m01']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(4, len(event_objects)) + + def testParseLegacyCache_001(self): + """Test Firefox 3 cache file _CACHE_001_ parsing.""" + + test_file = self._GetTestFilePath( + ['firefox_cache', 'firefox3', '_CACHE_001_']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(25, len(event_objects)) + + self.assertEquals(event_objects[0].timestamp, + timelib_test.CopyStringToTimestamp('2014-05-02 14:15:03')) + + expected_msg = ( + u'Fetched 1 time(s) ' + u'[HTTP/1.1 200 OK] GET ' + u'"HTTP:http://start.mozilla.org/en-US/"') + expected_msg_short = ( + u'[HTTP/1.1 200 OK] GET ' + u'"HTTP:http://start.mozilla.org/en-US/"') + + self._TestGetMessageStrings( + event_objects[0], expected_msg, expected_msg_short) + + def testParseLegacyCache_002(self): + """Test Firefox 3 cache file _CACHE_002_ parsing.""" + + test_file = self._GetTestFilePath( + ['firefox_cache', 'firefox3', '_CACHE_002_']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(3, len(event_objects)) + + self.assertEquals(event_objects[1].timestamp, + timelib_test.CopyStringToTimestamp('2014-05-02 14:25:55')) + + def testParseLegacyCache_003(self): + """Test Firefox 3 cache file _CACHE_003_ parsing.""" + + test_file = self._GetTestFilePath( + ['firefox_cache', 'firefox3', '_CACHE_003_']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(2, len(event_objects)) + + self.assertEquals(event_objects[1].timestamp, + timelib_test.CopyStringToTimestamp('2014-05-02 14:15:07')) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/hachoir.py b/plaso/parsers/hachoir.py new file mode 100644 index 0000000..e58e4bf --- /dev/null +++ b/plaso/parsers/hachoir.py @@ -0,0 +1,172 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a parser for extracting metadata.""" +# TODO: Add a unit test for this parser. + +import datetime + +import hachoir_core.config + +# This is necessary to do PRIOR to loading up other parts of hachoir +# framework, otherwise console does not work and other "weird" behavior +# is observed. +hachoir_core.config.unicode_stdout = False +hachoir_core.config.quiet = True + +import hachoir_core +import hachoir_parser +import hachoir_metadata + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class HachoirEvent(time_events.TimestampEvent): + """Process timestamps from Hachoir Events.""" + + DATA_TYPE = 'metadata:hachoir' + + def __init__(self, dt_timestamp, usage, attributes): + """An EventObject created from a Hachoir entry. + + Args: + dt_timestamp: A python datetime.datetime object. + usage: The description of the usage of the time value. + attributes: A dict containing metadata for the event. + """ + timestamp = timelib.Timestamp.FromPythonDatetime(dt_timestamp) + super(HachoirEvent, self).__init__(timestamp, usage, self.DATA_TYPE) + self.metadata = attributes + + +class HachoirParser(interface.BaseParser): + """Parse meta data from files.""" + + NAME = 'hachoir' + DESCRIPTION = u'Parser that wraps Hachoir.' + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a file using Hachoir. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + + try: + fstream = hachoir_core.stream.InputIOStream(file_object, None, tags=[]) + except hachoir_core.error.HachoirError as exception: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + if not fstream: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( + self.NAME, file_entry.name, 'Not fstream')) + + try: + doc_parser = hachoir_parser.guessParser(fstream) + except hachoir_core.error.HachoirError as exception: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + if not doc_parser: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( + self.NAME, file_entry.name, 'Not parser')) + + try: + metadata = hachoir_metadata.extractMetadata(doc_parser) + except (AssertionError, AttributeError) as exception: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + try: + metatext = metadata.exportPlaintext(human=False) + except AttributeError as exception: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + if not metatext: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s}: No metadata'.format( + self.NAME, file_entry.name)) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + attributes = {} + extracted_events = [] + for meta in metatext: + if not meta.startswith('-'): + continue + + if len(meta) < 3: + continue + + key, _, value = meta[2:].partition(': ') + + key2, _, value2 = value.partition(': ') + if key2 == 'LastPrinted' and value2 != 'False': + date_object = timelib.StringToDatetime( + value2, timezone=parser_context.timezone) + if isinstance(date_object, datetime.datetime): + extracted_events.append((date_object, key2)) + + try: + date = metadata.get(key) + if isinstance(date, datetime.datetime): + extracted_events.append((date, key)) + except ValueError: + pass + + if key in attributes: + if isinstance(attributes.get(key), list): + attributes[key].append(value) + else: + old_value = attributes.get(key) + attributes[key] = [old_value, value] + else: + attributes[key] = value + + if not extracted_events: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( + self.NAME, file_entry.name, 'No events discovered')) + + for date, key in extracted_events: + event_object = HachoirEvent(date, key, attributes) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +manager.ParsersManager.RegisterParser(HachoirParser) diff --git a/plaso/parsers/iis.py b/plaso/parsers/iis.py new file mode 100644 index 0000000..80ac8d8 --- /dev/null +++ b/plaso/parsers/iis.py @@ -0,0 +1,234 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Windows IIS Log file. + +More documentation on fields can be found here: +http://www.microsoft.com/technet/prodtechnol/WindowsServer2003/Library/ +IIS/676400bc-8969-4aa7-851a-9319490a9bbb.mspx?mfr=true + +""" + +import logging + +import pyparsing + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + + +__author__ = 'Ashley Holtz (ashley.a.holtz@gmail.com)' + + +class IISEventObject(time_events.TimestampEvent): + """Convenience class to handle the IIS event object.""" + + DATA_TYPE = 'iis:log:line' + + def __init__(self, timestamp, structure): + """Initializes the IIS event object. + + Args: + timestamp: The timestamp time value, epoch. + structure: The structure with any parsed log values to iterate over. + """ + super(IISEventObject, self).__init__( + timestamp, eventdata.EventTimestamp.WRITTEN_TIME) + + for key, value in structure.iteritems(): + if key in ('time', 'date'): + continue + if value == u'-': + continue + if type(value) is pyparsing.ParseResults: + setattr(self, key, u''.join(value)) + else: + try: + save_value = int(value, 10) + except ValueError: + save_value = value + setattr(self, key, save_value) + + +class WinIISParser(text_parser.PyparsingSingleLineTextParser): + """Parses a Microsoft IIS log file.""" + + NAME = 'winiis' + DESCRIPTION = u'Parser for Microsoft IIS log files.' + + # Common Fields (6.0: date time s-sitename s-ip cs-method cs-uri-stem + # cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status + # sc-substatus sc-win32-status. + # Common Fields (7.5): date time s-ip cs-method cs-uri-stem cs-uri-query + # s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus + # sc-win32-status time-taken + + # Define common structures. + BLANK = pyparsing.Literal(u'-') + WORD = pyparsing.Word(pyparsing.alphanums + u'-') | BLANK + INT = pyparsing.Word(pyparsing.nums, min=1) | BLANK + IP = ( + text_parser.PyparsingConstants.IPV4_ADDRESS | + text_parser.PyparsingConstants.IPV6_ADDRESS | BLANK) + PORT = pyparsing.Word(pyparsing.nums, min=1, max=6) | BLANK + URI = pyparsing.Word(pyparsing.alphanums + u'/.?&+;_=()-:,%') | BLANK + + # Define how a log line should look like for version 6.0. + LOG_LINE_6_0 = ( + text_parser.PyparsingConstants.DATE.setResultsName('date') + + text_parser.PyparsingConstants.TIME.setResultsName('time') + + WORD.setResultsName('s_sitename') + IP.setResultsName('dest_ip') + + WORD.setResultsName('http_method') + URI.setResultsName('cs_uri_stem') + + URI.setResultsName('cs_uri_query') + PORT.setResultsName('dest_port') + + WORD.setResultsName('cs_username') + IP.setResultsName('source_ip') + + URI.setResultsName('user_agent') + INT.setResultsName('sc_status') + + INT.setResultsName('sc_substatus') + + INT.setResultsName('sc_win32_status')) + + _LOG_LINE_STRUCTURES = {} + + # Common fields. Set results name with underscores, not hyphens because regex + # will not pick them up. + _LOG_LINE_STRUCTURES['date'] = ( + text_parser.PyparsingConstants.DATE.setResultsName('date')) + _LOG_LINE_STRUCTURES['time'] = ( + text_parser.PyparsingConstants.TIME.setResultsName('time')) + _LOG_LINE_STRUCTURES['s-sitename'] = WORD.setResultsName('s_sitename') + _LOG_LINE_STRUCTURES['s-ip'] = IP.setResultsName('dest_ip') + _LOG_LINE_STRUCTURES['cs-method'] = WORD.setResultsName('http_method') + _LOG_LINE_STRUCTURES['cs-uri-stem'] = URI.setResultsName('requested_uri_stem') + _LOG_LINE_STRUCTURES['cs-uri-query'] = URI.setResultsName('cs_uri_query') + _LOG_LINE_STRUCTURES['s-port'] = PORT.setResultsName('dest_port') + _LOG_LINE_STRUCTURES['cs-username'] = WORD.setResultsName('cs_username') + _LOG_LINE_STRUCTURES['c-ip'] = IP.setResultsName('source_ip') + _LOG_LINE_STRUCTURES['cs(User-Agent)'] = URI.setResultsName('user_agent') + _LOG_LINE_STRUCTURES['sc-status'] = INT.setResultsName('http_status') + _LOG_LINE_STRUCTURES['sc-substatus'] = INT.setResultsName('sc_substatus') + _LOG_LINE_STRUCTURES['sc-win32-status'] = ( + INT.setResultsName('sc_win32_status')) + + # Less common fields. + _LOG_LINE_STRUCTURES['s-computername'] = URI.setResultsName('s_computername') + _LOG_LINE_STRUCTURES['sc-bytes'] = INT.setResultsName('sent_bytes') + _LOG_LINE_STRUCTURES['cs-bytes'] = INT.setResultsName('received_bytes') + _LOG_LINE_STRUCTURES['time-taken'] = INT.setResultsName('time_taken') + _LOG_LINE_STRUCTURES['cs-version'] = WORD.setResultsName('protocol_version') + _LOG_LINE_STRUCTURES['cs-host'] = WORD.setResultsName('cs_host') + _LOG_LINE_STRUCTURES['cs(Cookie)'] = URI.setResultsName('cs_cookie') + _LOG_LINE_STRUCTURES['cs(Referrer)'] = URI.setResultsName('cs_referrer') + + # Define the available log line structures. Default to the IIS v. 6.0 + # common format. + LINE_STRUCTURES = [ + ('comment', text_parser.PyparsingConstants.COMMENT_LINE_HASH), + ('logline', LOG_LINE_6_0)] + + # Define a signature value for the log file. + SIGNATURE = '#Software: Microsoft Internet Information Services' + + def __init__(self): + """Initializes a parser object.""" + super(WinIISParser, self).__init__() + self.version = None + self.software = None + + def VerifyStructure(self, unused_parser_context, line): + """Verify that this file is an IIS log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + line: A single line from the text file. + + Returns: + True if this is the correct parser, False otherwise. + """ + # TODO: Examine other versions of the file format and if this parser should + # support them. For now just checking if it contains the IIS header. + if self.SIGNATURE in line: + return True + + return False + + def ParseRecord(self, unused_parser_context, key, structure): + """Parse each record structure and return an event object if applicable. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + if key == 'comment': + self._ParseCommentRecord(structure) + elif key == 'logline': + return self._ParseLogLine(structure) + else: + logging.warning( + u'Unable to parse record, unknown structure: {0:s}'.format(key)) + + def _ParseCommentRecord(self, structure): + """Parse a comment and store appropriate attributes.""" + comment = structure[1] + if comment.startswith(u'Version'): + _, _, self.version = comment.partition(u':') + elif comment.startswith(u'Software'): + _, _, self.software = comment.partition(u':') + elif comment.startswith(u'Date'): + # TODO: fix this date is not used here. + _, _, unused_date = comment.partition(u':') + + # Check if there's a Fields line. If not, LOG_LINE defaults to IIS 6.0 + # common format. + elif comment.startswith(u'Fields'): + log_line = pyparsing.Empty() + for member in comment[7:].split(): + log_line += self._LOG_LINE_STRUCTURES.get(member, self.URI) + # TODO: self._line_structures is a work-around and this needs + # a structural fix. + self._line_structures[1] = ('logline', log_line) + + def _ParseLogLine(self, structure): + """Parse a single log line and return an EventObject.""" + date = structure.get('date', None) + time = structure.get('time', None) + + if not (date and time): + logging.warning(( + u'Unable to extract timestamp from IIS log line with structure: ' + u'{0:s}.').format(structure)) + return + + year, month, day = date + hour, minute, second = time + + timestamp = timelib.Timestamp.FromTimeParts( + year, month, day, hour, minute, second) + + if not timestamp: + return + + return IISEventObject(timestamp, structure) + + +manager.ParsersManager.RegisterParser(WinIISParser) diff --git a/plaso/parsers/iis_test.py b/plaso/parsers/iis_test.py new file mode 100644 index 0000000..1ef1ed3 --- /dev/null +++ b/plaso/parsers/iis_test.py @@ -0,0 +1,96 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Windows IIS log parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import iis as iis_formatter +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import iis + + +__author__ = 'Ashley Holtz (ashley.a.holtz@gmail.com)' + + +class WinIISUnitTest(test_lib.ParserTestCase): + """Tests for the Windows IIS parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = iis.WinIISParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['iis.log']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 11) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-30 00:00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(event_object.source_ip, u'10.10.10.100') + self.assertEquals(event_object.dest_ip, u'10.10.10.100') + self.assertEquals(event_object.dest_port, 80) + + expected_msg = ( + u'GET /some/image/path/something.jpg ' + u'[ 10.10.10.100 > 10.10.10.100 : 80 ] ' + u'Http Status: 200 ' + u'User Agent: Mozilla/4.0+(compatible;+Win32;' + u'+WinHttp.WinHttpRequest.5)') + expected_msg_short = ( + u'GET /some/image/path/something.jpg ' + u'[ 10.10.10.100 > 10.10.10.100 : 80 ]') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[5] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-30 00:00:05') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(event_object.http_method, 'GET') + self.assertEquals(event_object.http_status, 200) + self.assertEquals( + event_object.requested_uri_stem, u'/some/image/path/something.jpg') + + event_object = event_objects[1] + + expected_msg = ( + u'GET /some/image/path/something.htm ' + u'[ 22.22.22.200 > 10.10.10.100 : 80 ] ' + u'Http Status: 404 ' + u'User Agent: Mozilla/5.0+(Macintosh;+Intel+Mac+OS+X+10_6_8)' + u'+AppleWebKit/534.57.2+(KHTML,+like+Gecko)+Version/5.1.7' + u'+Safari/534.57.2') + expected_msg_short = ( + u'GET /some/image/path/something.htm ' + u'[ 22.22.22.200 > 10.10.10.100 : 80 ]') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/interface.py b/plaso/parsers/interface.py new file mode 100644 index 0000000..e70a5ea --- /dev/null +++ b/plaso/parsers/interface.py @@ -0,0 +1,251 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a class to provide a parsing framework to plaso. + +This class contains a base framework class for parsing fileobjects, and +also some implementations that extend it to provide a more comprehensive +parser. +""" + +import abc + +from plaso.parsers import manager + + +class BaseParser(object): + """Class that implements the parser object interface.""" + + NAME = 'base_parser' + DESCRIPTION = u'' + + def _BuildParserChain(self, parser_chain=None): + """Return the parser chain with the addition of the current parser. + + Args: + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Returns: + The parser chain, with the addition of the current parser. + """ + if not parser_chain: + return self.NAME + + return u'/'.join([parser_chain, self.NAME]) + + @abc.abstractmethod + def Parse(self, parser_context, file_entry, parser_chain=None): + """Parsers the file entry and extracts event objects. + + This is the main function of the class, the one that actually + goes through the log file and parses each line of it to + produce a parsed line and a timestamp. + + It also tries to verify the file structure and see if the class is capable + of parsing the file passed to the module. It will do so with series of tests + that should determine if the file is of the correct structure. + + If the class is not capable of parsing the file passed to it an exception + should be raised, an exception of the type UnableToParseFile that indicates + the reason why the class does not parse it. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + NotImplementedError when not implemented. + """ + raise NotImplementedError + + @classmethod + def SupportsPlugins(cls): + """Determines if a parser supports plugins. + + Returns: + A boolean value indicating whether the parser supports plugins. + """ + return False + + +class BasePluginsParser(BaseParser): + """Class that implements the parser with plugins object interface.""" + + NAME = 'base_plugin_parser' + DESCRIPTION = u'' + + # Every child class should define its own _plugin_classes dict. + # We don't define it here to make sure the plugins of different + # classes don't end up in the same dict. + # _plugin_classes = {} + _plugin_classes = None + + @classmethod + def DeregisterPlugin(cls, plugin_class): + """Deregisters a plugin class. + + The plugin classes are identified based on their lower case name. + + Args: + plugin_class: the class object of the plugin. + + Raises: + KeyError: if plugin class is not set for the corresponding name. + """ + plugin_name = plugin_class.NAME.lower() + if plugin_name not in cls._plugin_classes: + raise KeyError( + u'Plugin class not set for name: {0:s}.'.format( + plugin_class.NAME)) + + del cls._plugin_classes[plugin_name] + + @classmethod + def GetPluginNames(cls, parser_filter_string=None): + """Retrieves the plugin names. + + Args: + parser_filter_string: Optional parser filter string. The default is None. + + Returns: + A list of plugin names. + """ + plugin_names = [] + + for plugin_name, _ in cls.GetPlugins( + parser_filter_string=parser_filter_string): + plugin_names.append(plugin_name) + + return plugin_names + + @classmethod + def GetPluginObjects(cls, parser_filter_string=None): + """Retrieves the plugin objects. + + Args: + parser_filter_string: Optional parser filter string. The default is None. + + Returns: + A list of plugin objects (instances of BasePlugin). + """ + plugin_objects = [] + + for _, plugin_class in cls.GetPlugins( + parser_filter_string=parser_filter_string): + plugin_object = plugin_class() + plugin_objects.append(plugin_object) + + return plugin_objects + + @classmethod + def GetPlugins(cls, parser_filter_string=None): + """Retrieves the registered plugins. + + Args: + parser_filter_string: Optional parser filter string. The default is None. + + Yields: + A tuple that contains the uniquely identifying name of the plugin + and the plugin class (subclass of BasePlugin). + """ + if parser_filter_string: + includes, excludes = manager.ParsersManager.GetFilterListsFromString( + parser_filter_string) + else: + includes = None + excludes = None + + for plugin_name, plugin_class in cls._plugin_classes.iteritems(): + if excludes and plugin_name in excludes: + continue + + if includes and plugin_name not in includes: + continue + + yield plugin_name, plugin_class + + @abc.abstractmethod + def Parse(self, parser_context, file_entry, parser_chain=None): + """Parsers the file entry and extracts event objects. + + This is the main function of the class, the one that actually + goes through the log file and parses each line of it to + produce a parsed line and a timestamp. + + It also tries to verify the file structure and see if the class is capable + of parsing the file passed to the module. It will do so with series of tests + that should determine if the file is of the correct structure. + + If the class is not capable of parsing the file passed to it an exception + should be raised, an exception of the type UnableToParseFile that indicates + the reason why the class does not parse it. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + NotImplementedError when not implemented. + """ + raise NotImplementedError + + @classmethod + def RegisterPlugin(cls, plugin_class): + """Registers a plugin class. + + The plugin classes are identified based on their lower case name. + + Args: + plugin_class: the class object of the plugin. + + Raises: + KeyError: if plugin class is already set for the corresponding name. + """ + plugin_name = plugin_class.NAME.lower() + if plugin_name in cls._plugin_classes: + raise KeyError(( + u'Plugin class already set for name: {0:s}.').format( + plugin_class.NAME)) + + cls._plugin_classes[plugin_name] = plugin_class + + @classmethod + def RegisterPlugins(cls, plugin_classes): + """Registers plugin classes. + + Args: + plugin_classes: a list of class objects of the plugins. + + Raises: + KeyError: if plugin class is already set for the corresponding name. + """ + for plugin_class in plugin_classes: + cls.RegisterPlugin(plugin_class) + + @classmethod + def SupportsPlugins(cls): + """Determines if a parser supports plugins. + + Returns: + A boolean value indicating whether the parser supports plugins. + """ + return True diff --git a/plaso/parsers/java_idx.py b/plaso/parsers/java_idx.py new file mode 100644 index 0000000..f3f754a --- /dev/null +++ b/plaso/parsers/java_idx.py @@ -0,0 +1,236 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Java Cache IDX files.""" + +# TODO: +# * 6.02 files did not retain IP addresses. However, the +# deploy_resource_codebase header field may contain the host IP. +# This needs to be researched further, as that field may not always +# be present. 6.02 files will currently return 'Unknown'. +import construct + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +class JavaIDXEvent(time_events.TimestampEvent): + """Convenience class for a Java IDX cache file download event.""" + + DATA_TYPE = 'java:download:idx' + + def __init__( + self, timestamp, timestamp_description, idx_version, url, ip_address): + """Initializes the event object. + + Args: + timestamp: The timestamp value. + timestamp_description: The description of the usage of the time value. + idx_version: Version of IDX file. + url: URL of the downloaded file. + ip_address: IP address of the host in the URL. + """ + super(JavaIDXEvent, self).__init__(timestamp, timestamp_description) + self.idx_version = idx_version + self.url = url + self.ip_address = ip_address + + +class JavaIDXParser(interface.BaseParser): + """Parse Java IDX files for download events. + + There are five structures defined. 6.02 files had one generic section + that retained all data. From 6.03, the file went to a multi-section + format where later sections were optional and had variable-lengths. + 6.03, 6.04, and 6.05 files all have their main data section (#2) + begin at offset 128. The short structure is because 6.05 files + deviate after the 8th byte. So, grab the first 8 bytes to ensure it's + valid, get the file version, then continue on with the correct + structures. + """ + + NAME = 'java_idx' + DESCRIPTION = u'Parser for Java IDX files.' + + IDX_SHORT_STRUCT = construct.Struct( + 'magic', + construct.UBInt8('busy'), + construct.UBInt8('incomplete'), + construct.UBInt32('idx_version')) + + IDX_602_STRUCT = construct.Struct( + 'IDX_602_Full', + construct.UBInt16('null_space'), + construct.UBInt8('shortcut'), + construct.UBInt32('content_length'), + construct.UBInt64('last_modified_date'), + construct.UBInt64('expiration_date'), + construct.PascalString( + 'version_string', length_field=construct.UBInt16('length')), + construct.PascalString( + 'url', length_field=construct.UBInt16('length')), + construct.PascalString( + 'namespace', length_field=construct.UBInt16('length')), + construct.UBInt32('FieldCount')) + + IDX_605_SECTION_ONE_STRUCT = construct.Struct( + 'IDX_605_Section1', + construct.UBInt8('shortcut'), + construct.UBInt32('content_length'), + construct.UBInt64('last_modified_date'), + construct.UBInt64('expiration_date'), + construct.UBInt64('validation_date'), + construct.UBInt8('signed'), + construct.UBInt32('sec2len'), + construct.UBInt32('sec3len'), + construct.UBInt32('sec4len')) + + IDX_605_SECTION_TWO_STRUCT = construct.Struct( + 'IDX_605_Section2', + construct.PascalString( + 'version', length_field=construct.UBInt16('length')), + construct.PascalString( + 'url', length_field=construct.UBInt16('length')), + construct.PascalString( + 'namespec', length_field=construct.UBInt16('length')), + construct.PascalString( + 'ip_address', length_field=construct.UBInt16('length')), + construct.UBInt32('FieldCount')) + + # Java uses Pascal-style strings, but with a 2-byte length field. + JAVA_READUTF_STRING = construct.Struct( + 'Java.ReadUTF', + construct.PascalString( + 'string', length_field=construct.UBInt16('length'))) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a Java cache IDX file. + + This is the main parsing engine for the parser. It determines if + the selected file is a proper IDX file. It then checks the file + version to determine the correct structure to apply to extract + data. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + try: + magic = self.IDX_SHORT_STRUCT.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + raise errors.UnableToParseFile( + u'Unable to parse Java IDX file with error: {0:s}.'.format(exception)) + + # Fields magic.busy and magic.incomplete are normally 0x00. They + # are set to 0x01 if the file is currently being downloaded. Logic + # checks for > 1 to avoid a race condition and still reject any + # file with other data. + # Field magic.idx_version is the file version, of which only + # certain versions are supported. + if magic.busy > 1 or magic.incomplete > 1: + raise errors.UnableToParseFile(u'Not a valid Java IDX file') + + if not magic.idx_version in [602, 603, 604, 605]: + raise errors.UnableToParseFile(u'Not a valid Java IDX file') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + # Obtain the relevant values from the file. The last modified date + # denotes when the file was last modified on the HOST. For example, + # when the file was uploaded to a web server. + if magic.idx_version == 602: + section_one = self.IDX_602_STRUCT.parse_stream(file_object) + last_modified_date = section_one.last_modified_date + url = section_one.url + ip_address = 'Unknown' + http_header_count = section_one.FieldCount + elif magic.idx_version in [603, 604, 605]: + + # IDX 6.03 and 6.04 have two unused bytes before the structure. + if magic.idx_version in [603, 604]: + file_object.read(2) + + # IDX 6.03, 6.04, and 6.05 files use the same structures for the + # remaining data. + section_one = self.IDX_605_SECTION_ONE_STRUCT.parse_stream(file_object) + last_modified_date = section_one.last_modified_date + if file_object.get_size() > 128: + file_object.seek(128) # Static offset for section 2. + section_two = self.IDX_605_SECTION_TWO_STRUCT.parse_stream(file_object) + url = section_two.url + ip_address = section_two.ip_address + http_header_count = section_two.FieldCount + else: + url = 'Unknown' + ip_address = 'Unknown' + http_header_count = 0 + + # File offset is now just prior to HTTP headers. Make sure there + # are headers, and then parse them to retrieve the download date. + download_date = None + for field in range(0, http_header_count): + field = self.JAVA_READUTF_STRING.parse_stream(file_object) + value = self.JAVA_READUTF_STRING.parse_stream(file_object) + if field.string == 'date': + # Time string "should" be in UTC or have an associated time zone + # information in the string itself. If that is not the case then + # there is no reliable method for plaso to determine the proper + # timezone, so the assumption is that it is UTC. + download_date = timelib.Timestamp.FromTimeString( + value.string, gmt_as_timezone=False) + + if not url or not ip_address: + raise errors.UnableToParseFile( + u'Unexpected Error: URL or IP address not found in file.') + + last_modified_timestamp = timelib.Timestamp.FromJavaTime( + last_modified_date) + # TODO: Move the timestamp description fields into eventdata. + event_object = JavaIDXEvent( + last_modified_timestamp, 'File Hosted Date', magic.idx_version, url, + ip_address) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if section_one: + expiration_date = section_one.get('expiration_date', None) + if expiration_date: + expiration_timestamp = timelib.Timestamp.FromJavaTime(expiration_date) + event_object = JavaIDXEvent( + expiration_timestamp, 'File Expiration Date', magic.idx_version, + url, ip_address) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if download_date: + event_object = JavaIDXEvent( + download_date, eventdata.EventTimestamp.FILE_DOWNLOADED, + magic.idx_version, url, ip_address) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +manager.ParsersManager.RegisterParser(JavaIDXParser) diff --git a/plaso/parsers/java_idx_test.py b/plaso/parsers/java_idx_test.py new file mode 100644 index 0000000..0229a01 --- /dev/null +++ b/plaso/parsers/java_idx_test.py @@ -0,0 +1,124 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Java Cache IDX file parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import java_idx as java_idx_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import java_idx +from plaso.parsers import test_lib + + +class IDXTest(test_lib.ParserTestCase): + """Tests for Java Cache IDX file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = java_idx.JavaIDXParser() + + def testParse602(self): + """Tests the Parse function on a version 602 IDX file.""" + test_file = self._GetTestFilePath(['java_602.idx']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 2) + + event_object = event_objects[0] + + idx_version_expected = 602 + self.assertEqual(event_object.idx_version, idx_version_expected) + + ip_address_expected = u'Unknown' + self.assertEqual(event_object.ip_address, ip_address_expected) + + url_expected = u'http://www.gxxxxx.com/a/java/xxz.jar' + self.assertEqual(event_object.url, url_expected) + + description_expected = u'File Hosted Date' + self.assertEqual(event_object.timestamp_desc, description_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2010-05-05 01:34:19.720') + self.assertEqual( + event_object.timestamp, expected_timestamp) + + # Parse second event. Same metadata; different timestamp event. + event_object = event_objects[1] + + self.assertEqual(event_object.idx_version, idx_version_expected) + self.assertEqual(event_object.ip_address, ip_address_expected) + self.assertEqual(event_object.url, url_expected) + + description_expected = eventdata.EventTimestamp.FILE_DOWNLOADED + self.assertEqual(event_object.timestamp_desc, description_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2010-05-05 03:52:31') + self.assertEqual(event_object.timestamp, expected_timestamp) + + def testParse605(self): + """Tests the Parse function on a version 605 IDX file.""" + test_file = self._GetTestFilePath(['java.idx']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 2) + + event_object = event_objects[0] + + idx_version_expected = 605 + self.assertEqual(event_object.idx_version, idx_version_expected) + + ip_address_expected = '10.7.119.10' + self.assertEqual(event_object.ip_address, ip_address_expected) + + url_expected = ( + u'http://xxxxc146d3.gxhjxxwsf.xx:82/forum/dare.php?' + u'hsh=6&key=b30xxxx1c597xxxx15d593d3f0xxx1ab') + self.assertEqual(event_object.url, url_expected) + + description_expected = 'File Hosted Date' + self.assertEqual(event_object.timestamp_desc, description_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2001-07-26 05:00:00' + ) + self.assertEqual(event_object.timestamp, expected_timestamp) + + # Parse second event. Same metadata; different timestamp event. + event_object = event_objects[1] + + self.assertEqual(event_object.idx_version, idx_version_expected) + self.assertEqual(event_object.ip_address, ip_address_expected) + self.assertEqual(event_object.url, url_expected) + + description_expected = eventdata.EventTimestamp.FILE_DOWNLOADED + self.assertEqual(event_object.timestamp_desc, description_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-01-13 16:22:01' + ) + self.assertEqual(event_object.timestamp, expected_timestamp) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/mac_appfirewall.py b/plaso/parsers/mac_appfirewall.py new file mode 100644 index 0000000..291e0b3 --- /dev/null +++ b/plaso/parsers/mac_appfirewall.py @@ -0,0 +1,257 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a appfirewall.log (Mac OS X Firewall) parser.""" + +import datetime +import logging + +import pyparsing + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class MacAppFirewallLogEvent(time_events.TimestampEvent): + """Convenience class for a Mac Wifi log line event.""" + + DATA_TYPE = 'mac:asl:appfirewall:line' + + def __init__(self, timestamp, structure, process_name, action): + """Initializes the event object. + + Args: + timestamp: The timestamp time value, epoch. + structure: structure with the parse fields. + computer_name: string with the name of the computer. + agent: string with the agent that save the log. + status: string with the saved status action. + process_name: string name of the entity that tried do the action. + action: string with the action + """ + super(MacAppFirewallLogEvent, self).__init__( + timestamp, eventdata.EventTimestamp.ADDED_TIME) + self.timestamp = timestamp + self.computer_name = structure.computer_name + self.agent = structure.agent + self.status = structure.status + self.process_name = process_name + self.action = action + + +class MacAppFirewallParser(text_parser.PyparsingSingleLineTextParser): + """Parse text based on appfirewall.log file.""" + + NAME = 'mac_appfirewall_log' + DESCRIPTION = u'Parser for appfirewall.log files.' + + ENCODING = u'utf-8' + + # Regular expressions for known actions. + + # Define how a log line should look like. + # Example: 'Nov 2 04:07:35 DarkTemplar-2.local socketfilterfw[112] ' + # ': Dropbox: Allow (in:0 out:2)' + # INFO: process_name is going to have a white space at the beginning. + FIREWALL_LINE = ( + text_parser.PyparsingConstants.MONTH.setResultsName('month') + + text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day') + + text_parser.PyparsingConstants.TIME.setResultsName('time') + + pyparsing.Word(pyparsing.printables).setResultsName('computer_name') + + pyparsing.Word(pyparsing.printables).setResultsName('agent') + + pyparsing.Literal(u'<').suppress() + + pyparsing.CharsNotIn(u'>').setResultsName('status') + + pyparsing.Literal(u'>:').suppress() + + pyparsing.CharsNotIn(u':').setResultsName('process_name') + + pyparsing.Literal(u':') + + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('action')) + + # Repeated line. + # Example: Nov 29 22:18:29 --- last message repeated 1 time --- + REPEATED_LINE = ( + text_parser.PyparsingConstants.MONTH.setResultsName('month') + + text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day') + + text_parser.PyparsingConstants.TIME.setResultsName('time') + + pyparsing.Literal(u'---').suppress() + + pyparsing.CharsNotIn(u'---').setResultsName('process_name') + + pyparsing.Literal(u'---').suppress()) + + # Define the available log line structures. + LINE_STRUCTURES = [ + ('logline', FIREWALL_LINE), + ('repeated', REPEATED_LINE)] + + def __init__(self): + """Initializes a parser object.""" + super(MacAppFirewallParser, self).__init__() + self._year_use = 0 + self._last_month = None + self.previous_structure = None + + def VerifyStructure(self, parser_context, line): + """Verify that this file is a Mac AppFirewall log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + line: A single line from the text file. + + Returns: + True if this is the correct parser, False otherwise. + """ + try: + line = self.FIREWALL_LINE.parseString(line) + except pyparsing.ParseException: + logging.debug(u'Not a Mac AppFirewall log file') + return False + if (line.action != 'creating /var/log/appfirewall.log' or + line.status != 'Error'): + return False + return True + + def ParseRecord(self, parser_context, key, structure): + """Parses each record structure and return an event object if applicable. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + if key == 'logline' or key == 'repeated': + return self._ParseLogLine(parser_context, structure, key) + else: + logging.warning( + u'Unable to parse record, unknown structure: {0:s}'.format(key)) + + def _ParseLogLine(self, parser_context, structure, key): + """Parse a logline and store appropriate attributes. + + Args: + parser_context: A parser context object (instance of ParserContext). + structure: log line of structure. + key: type of line log (normal or repeated). + + Returns: + Return an object MacAppFirewallLogEvent. + """ + # TODO: improve this to get a valid year. + if not self._year_use: + self._year_use = parser_context.year + + if not self._year_use: + # Get from the creation time of the file. + self._year_use = self._GetYear( + self.file_entry.GetStat(), parser_context.timezone) + # If fail, get from the current time. + if not self._year_use: + self._year_use = timelib.GetCurrentYear() + + # Gap detected between years. + month = timelib.MONTH_DICT.get(structure.month.lower()) + if not self._last_month: + self._last_month = month + if month < self._last_month: + self._year_use += 1 + timestamp = self._GetTimestamp( + structure.day, + month, + self._year_use, + structure.time) + if not timestamp: + logging.debug(u'Invalid timestamp {0:s}'.format(structure.timestamp)) + return + self._last_month = month + + # If the actual entry is a repeated entry, we take the basic information + # from the previous entry, but using the timestmap from the actual entry. + if key == 'logline': + self.previous_structure = structure + else: + structure = self.previous_structure + + # Pyparsing reads in RAW, but the text is in UTF8. + try: + action = structure.action.decode('utf-8') + except UnicodeDecodeError: + logging.warning( + u'Decode UTF8 failed, the message string may be cut short.') + action = structure.action.decode('utf-8', 'ignore') + # Due to the use of CharsNotIn pyparsing structure contains whitespaces + # that need to be removed. + process_name = structure.process_name.strip() + + event_object = MacAppFirewallLogEvent( + timestamp, structure, process_name, action) + return event_object + + def _GetTimestamp(self, day, month, year, time): + """Gets a timestamp from a pyparsing ParseResults timestamp. + + This is a timestamp_string as returned by using + text_parser.PyparsingConstants structures: + 08, Nov, [20, 36, 37] + + Args: + timestamp_string: The pyparsing ParseResults object + + Returns: + day: An integer representing the day. + month: An integer representing the month. + year: An integer representing the year. + timestamp: A plaso timelib timestamp event or 0. + """ + try: + hour, minute, second = time + timestamp = timelib.Timestamp.FromTimeParts( + year, month, day, hour, minute, second) + except ValueError: + timestamp = 0 + return timestamp + + def _GetYear(self, stat, timezone): + """Retrieves the year either from the input file or from the settings.""" + time = getattr(stat, 'crtime', 0) + if not time: + time = getattr(stat, 'ctime', 0) + + if not time: + logging.error( + u'Unable to determine correct year of log file, defaulting to ' + u'current year.') + return timelib.GetCurrentYear() + + try: + timestamp = datetime.datetime.fromtimestamp(time, timezone) + except ValueError as exception: + logging.error(( + u'Unable to determine correct year of log file with error: {0:s}, ' + u'defaulting to current year.').format(exception)) + return timelib.GetCurrentYear() + return timestamp.year + + +manager.ParsersManager.RegisterParser(MacAppFirewallParser) diff --git a/plaso/parsers/mac_appfirewall_test.py b/plaso/parsers/mac_appfirewall_test.py new file mode 100644 index 0000000..54d4456 --- /dev/null +++ b/plaso/parsers/mac_appfirewall_test.py @@ -0,0 +1,118 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Mac AppFirewall log file parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import mac_appfirewall as mac_appfirewall_formatter +from plaso.lib import timelib_test +from plaso.parsers import mac_appfirewall +from plaso.parsers import test_lib + + +class MacAppFirewallUnitTest(test_lib.ParserTestCase): + """Tests for Mac AppFirewall log file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = mac_appfirewall.MacAppFirewallParser() + + def testParseFile(self): + """Test parsing of a Mac Wifi log file.""" + knowledge_base_values = {'year': 2013} + test_file = self._GetTestFilePath(['appfirewall.log']) + event_queue_consumer = self._ParseFile( + self._parser, test_file, knowledge_base_values=knowledge_base_values) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEqual(len(event_objects), 47) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-02 04:07:35') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.agent, u'socketfilterfw[112]') + self.assertEqual(event_object.computer_name, u'DarkTemplar-2.local') + self.assertEqual(event_object.status, u'Error') + self.assertEqual(event_object.process_name, u'Logging') + self.assertEqual(event_object.action, u'creating /var/log/appfirewall.log') + + expected_msg = ( + u'Computer: DarkTemplar-2.local ' + u'Agent: socketfilterfw[112] ' + u'Status: Error ' + u'Process name: Logging ' + u'Log: creating /var/log/appfirewall.log') + expected_msg_short = ( + u'Process name: Logging ' + u'Status: Error') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[9] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-03 13:25:15') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.agent, u'socketfilterfw[87]') + self.assertEqual(event_object.computer_name, u'DarkTemplar-2.local') + self.assertEqual(event_object.status, u'Info') + self.assertEqual(event_object.process_name, u'Dropbox') + self.assertEqual(event_object.action, u'Allow TCP LISTEN (in:0 out:1)') + + expected_msg = ( + u'Computer: DarkTemplar-2.local ' + u'Agent: socketfilterfw[87] ' + u'Status: Info ' + u'Process name: Dropbox ' + u'Log: Allow TCP LISTEN (in:0 out:1)') + expected_msg_short = ( + u'Process name: Dropbox ' + u'Status: Info') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + # Check repeated lines. + event_object = event_objects[38] + repeated_event_object = event_objects[39] + self.assertEqual(event_object.agent, repeated_event_object.agent) + self.assertEqual( + event_object.computer_name, repeated_event_object.computer_name) + self.assertEqual(event_object.status, repeated_event_object.status) + self.assertEqual( + event_object.process_name, repeated_event_object.process_name) + self.assertEqual(event_object.action, repeated_event_object.action) + + # Year changes. + event_object = event_objects[45] + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-31 23:59:23') + self.assertEqual(event_object.timestamp, expected_timestamp) + + event_object = event_objects[46] + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-01-01 01:13:23') + self.assertEqual(event_object.timestamp, expected_timestamp) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/mac_keychain.py b/plaso/parsers/mac_keychain.py new file mode 100644 index 0000000..d875997 --- /dev/null +++ b/plaso/parsers/mac_keychain.py @@ -0,0 +1,507 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Mac OS X Keychain files.""" + +# INFO: Only supports internet and application passwords, +# because it is the only data that contains timestamp events. +# Keychain can also store "secret notes". These notes are stored +# in the same type than the application format, then, they are already +# supported. The stored wifi are also application passwords. + +# TODO: the AccessControl for each entry has not been implemented. Until now, +# I know that the AccessControl from Internet and App password are stored +# using other tables (Symmetric, certificates, etc). Access Control +# indicates which specific tool, or all, is able to use this entry. + + +import binascii +import construct +import logging +import os + +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class KeychainInternetRecordEvent(event.EventObject): + """Convenience class for an keychain internet record event.""" + + DATA_TYPE = 'mac:keychain:internet' + + def __init__( + self, timestamp, timestamp_desc, entry_name, account_name, + text_description, comments, where, protocol, type_protocol, ssgp_hash): + """Initializes the event object. + + Args: + timestamp: Description of the timestamp value. + timestamp_desc: Timelib type of the timestamp. + entry_name: Name of the entry. + account_name: Name of the account. + text_description: Short description about the entry. + comments: String that contains the comments added by the user. + where: The domain name or IP where the password is used. + protocol: The internet protocol used (eg. https). + type_protocol: The sub-protocol used (eg. form). + ssgp_hash: String with hexadecimal values from the password / cert hash. + """ + super(KeychainInternetRecordEvent, self).__init__() + self.timestamp = timestamp + self.timestamp_desc = timestamp_desc + self.entry_name = entry_name + self.account_name = account_name + self.text_description = text_description + self.where = where + self.protocol = protocol + self.type_protocol = type_protocol + self.comments = comments + self.ssgp_hash = ssgp_hash + + +class KeychainApplicationRecordEvent(event.EventObject): + """Convenience class for an keychain application password record event.""" + DATA_TYPE = 'mac:keychain:application' + + def __init__( + self, timestamp, timestamp_desc, entry_name, + account_name, text_description, comments, ssgp_hash): + """Initializes the event object. + + Args: + timestamp: Description of the timestamp value. + timestamp_desc: Timelib type of the timestamp. + entry_name: Name of the entry. + account_name: Name of the account. + text_description: Short description about the entry. + comments: String that contains the comments added by the user. + ssgp_hash: String with hexadecimal values from the password / cert hash. + """ + super(KeychainApplicationRecordEvent, self).__init__() + self.timestamp = timestamp + self.timestamp_desc = timestamp_desc + self.entry_name = entry_name + self.account_name = account_name + self.text_description = text_description + self.comments = comments + self.ssgp_hash = ssgp_hash + + +class KeychainParser(interface.BaseParser): + """Parser for Keychain files.""" + + NAME = 'mac_keychain' + DESCRIPTION = u'Parser for Mac OS X Keychain files.' + + KEYCHAIN_MAGIC_HEADER = 'kych' + KEYCHAIN_MAJOR_VERSION = 1 + KEYCHAIN_MINOR_VERSION = 0 + + RECORD_TYPE_APPLICATION = 0x80000000 + RECORD_TYPE_INTERNET = 0x80000001 + + # DB HEADER. + KEYCHAIN_DB_HEADER = construct.Struct( + 'db_header', + construct.String('magic', 4), + construct.UBInt16('major_version'), + construct.UBInt16('minor_version'), + construct.UBInt32('header_size'), + construct.UBInt32('schema_offset'), + construct.Padding(4)) + + # DB SCHEMA. + KEYCHAIN_DB_SCHEMA = construct.Struct( + 'db_schema', + construct.UBInt32('size'), + construct.UBInt32('number_of_tables')) + # For each number_of_tables, the schema has a TABLE_OFFSET with the + # offset starting in the DB_SCHEMA. + TABLE_OFFSET = construct.UBInt32('table_offset') + + TABLE_HEADER = construct.Struct( + 'table_header', + construct.UBInt32('table_size'), + construct.UBInt32('record_type'), + construct.UBInt32('number_of_records'), + construct.UBInt32('first_record'), + construct.UBInt32('index_offset'), + construct.Padding(4), + construct.UBInt32('recordnumbercount')) + + RECORD_HEADER = construct.Struct( + 'record_header', + construct.UBInt32('entry_length'), + construct.Padding(12), + construct.UBInt32('ssgp_length'), + construct.Padding(4), + construct.UBInt32('creation_time'), + construct.UBInt32('last_mod_time'), + construct.UBInt32('text_description'), + construct.Padding(4), + construct.UBInt32('comments'), + construct.Padding(8), + construct.UBInt32('entry_name'), + construct.Padding(20), + construct.UBInt32('account_name'), + construct.Padding(4)) + RECORD_HEADER_APP = construct.Struct( + 'record_entry_app', + RECORD_HEADER, + construct.Padding(4)) + RECORD_HEADER_INET = construct.Struct( + 'record_entry_inet', + RECORD_HEADER, + construct.UBInt32('where'), + construct.UBInt32('protocol'), + construct.UBInt32('type'), + construct.Padding(4), + construct.UBInt32('url')) + + TEXT = construct.PascalString( + 'text', length_field=construct.UBInt32('length')) + TIME = construct.Struct( + 'timestamp', + construct.String('year', 4), + construct.String('month', 2), + construct.String('day', 2), + construct.String('hour', 2), + construct.String('minute', 2), + construct.String('second', 2), + construct.Padding(2)) + TYPE_TEXT = construct.String('type', 4) + + # TODO: add more protocols. + _PROTOCOL_TRANSLATION_DICT = { + u'htps': u'https', + u'smtp': u'smtp', + u'imap': u'imap', + u'http': u'http'} + + def _GetTimestampFromEntry(self, parser_context, file_entry, structure): + """Parse a time entry structure into a microseconds since Epoch in UTC. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + structure: TIME entry structure: + year: String with the number of the year. + month: String with the number of the month. + day: String with the number of the day. + hour: String with the number of the month. + minute: String with the number of the minute. + second: String with the number of the second. + + Returns: + Microseconds since Epoch in UTC. + """ + try: + return timelib.Timestamp.FromTimeParts( + int(structure.year, 10), int(structure.month, 10), + int(structure.day, 10), int(structure.hour, 10), + int(structure.minute, 10), int(structure.second, 10)) + except ValueError: + logging.warning( + u'[{0:s}] Invalid keychain time {1!s} in file: {2:s}'.format( + self.NAME, parser_context.GetDisplayName(file_entry), structure)) + return 0 + + def _ReadEntryApplication( + self, parser_context, file_object, file_entry=None, parser_chain=None): + """Extracts the information from an application password entry. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_object: A file-like object that points to an Keychain file. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + offset = file_object.tell() + try: + record = self.RECORD_HEADER_APP.parse_stream(file_object) + except (IOError, construct.FieldError): + logging.warning(( + u'[{0:s}] Unsupported record header at 0x{1:08x} in file: ' + u'{2:s}').format( + self.NAME, offset, parser_context.GetDisplayName(file_entry))) + return + + (ssgp_hash, creation_time, last_mod_time, text_description, + comments, entry_name, account_name) = self._ReadEntryHeader( + parser_context, file_entry, file_object, record.record_header, offset) + + # Move to the end of the record, and then, prepared for the next record. + file_object.seek( + record.record_header.entry_length + offset - file_object.tell(), + os.SEEK_CUR) + event_object = KeychainApplicationRecordEvent( + creation_time, eventdata.EventTimestamp.CREATION_TIME, + entry_name, account_name, text_description, comments, ssgp_hash) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if creation_time != last_mod_time: + event_object = KeychainApplicationRecordEvent( + last_mod_time, eventdata.EventTimestamp.MODIFICATION_TIME, + entry_name, account_name, text_description, comments, ssgp_hash) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + def _ReadEntryHeader( + self, parser_context, file_entry, file_object, record, offset): + """Read the common record attributes. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + file_object: A file-like object that points to an Keychain file. + record: Structure with the header of the record. + offset: First byte of the record. + + Returns: + A list of: + ssgp_hash: Hash of the encrypted data (passwd, cert, note). + creation_time: When the entry was created. + last_mod_time: Last time the entry was updated. + text_description: A brief description of the entry. + entry_name: Name of the entry + account_name: Name of the account. + """ + # Info: The hash header always start with the string ssgp follow by + # the hash. Furthermore The fields are always a multiple of four. + # Then if it is not multiple the value is padded by 0x00. + ssgp_hash = binascii.hexlify(file_object.read(record.ssgp_length)[4:]) + + file_object.seek( + record.creation_time - file_object.tell() + offset - 1, os.SEEK_CUR) + creation_time = self._GetTimestampFromEntry( + parser_context, file_entry, self.TIME.parse_stream(file_object)) + + file_object.seek( + record.last_mod_time - file_object.tell() + offset - 1, os.SEEK_CUR) + last_mod_time = self._GetTimestampFromEntry( + parser_context, file_entry, self.TIME.parse_stream(file_object)) + + # The comment field does not always contain data. + if record.text_description: + file_object.seek( + record.text_description - file_object.tell() + offset -1, + os.SEEK_CUR) + try: + text_description = self.TEXT.parse_stream(file_object) + except construct.FieldError: + text_description = u'N/A (error)' + else: + text_description = u'N/A' + + # The comment field does not always contain data. + if record.comments: + file_object.seek( + record.text_description - file_object.tell() + offset -1, + os.SEEK_CUR) + try: + comments = self.TEXT.parse_stream(file_object) + except construct.FieldError: + comments = u'N/A (error)' + else: + comments = u'N/A' + + file_object.seek( + record.entry_name - file_object.tell() + offset - 1, os.SEEK_CUR) + try: + entry_name = self.TEXT.parse_stream(file_object) + except construct.FieldError: + entry_name = u'N/A (error)' + + file_object.seek( + record.account_name - file_object.tell() + offset - 1, os.SEEK_CUR) + try: + account_name = self.TEXT.parse_stream(file_object) + except construct.FieldError: + account_name = u'N/A (error)' + + return ( + ssgp_hash, creation_time, last_mod_time, + text_description, comments, entry_name, account_name) + + def _ReadEntryInternet( + self, parser_context, file_object, file_entry=None, parser_chain=None): + """Extracts the information from an Internet password entry. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_object: A file-like object that points to an Keychain file. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + offset = file_object.tell() + try: + record = self.RECORD_HEADER_INET.parse_stream(file_object) + except (IOError, construct.FieldError): + logging.warning(( + u'[{0:s}] Unsupported record header at 0x{1:08x} in file: ' + u'{2:s}').format( + self.NAME, offset, parser_context.GetDisplayName(file_entry))) + return + + (ssgp_hash, creation_time, last_mod_time, text_description, + comments, entry_name, account_name) = self._ReadEntryHeader( + parser_context, file_entry, file_object, record.record_header, offset) + if not record.where: + where = u'N/A' + protocol = u'N/A' + type_protocol = u'N/A' + else: + file_object.seek( + record.where - file_object.tell() + offset - 1, os.SEEK_CUR) + where = self.TEXT.parse_stream(file_object) + file_object.seek( + record.protocol - file_object.tell() + offset - 1, os.SEEK_CUR) + protocol = self.TYPE_TEXT.parse_stream(file_object) + file_object.seek( + record.type - file_object.tell() + offset - 1, os.SEEK_CUR) + type_protocol = self.TEXT.parse_stream(file_object) + type_protocol = self._PROTOCOL_TRANSLATION_DICT.get( + type_protocol, type_protocol) + if record.url: + file_object.seek( + record.url - file_object.tell() + offset - 1, os.SEEK_CUR) + url = self.TEXT.parse_stream(file_object) + where = u'{0:s}{1:s}'.format(where, url) + + # Move to the end of the record, and then, prepared for the next record. + file_object.seek( + record.record_header.entry_length + offset - file_object.tell(), + os.SEEK_CUR) + + event_object = KeychainInternetRecordEvent( + creation_time, eventdata.EventTimestamp.CREATION_TIME, + entry_name, account_name, text_description, + comments, where, protocol, type_protocol, ssgp_hash) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if creation_time != last_mod_time: + event_object = KeychainInternetRecordEvent( + last_mod_time, eventdata.EventTimestamp.MODIFICATION_TIME, + entry_name, account_name, text_description, + comments, where, protocol, type_protocol, ssgp_hash) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + def _VerifyStructure(self, file_object): + """Verify that we are dealing with an Keychain entry. + + Args: + file_object: A file-like object that points to an Keychain file. + + Returns: + A list of table positions if it is a keychain, None otherwise. + """ + # INFO: The HEADER KEYCHAIN: + # [DBHEADER] + [DBSCHEMA] + [OFFSET TABLE A] + ... + [OFFSET TABLE Z] + # Where the table offset is relative to the first byte of the DB Schema, + # then we must add to this offset the size of the [DBHEADER]. + try: + db_header = self.KEYCHAIN_DB_HEADER.parse_stream(file_object) + except (IOError, construct.FieldError): + return + if (db_header.minor_version != self.KEYCHAIN_MINOR_VERSION or + db_header.major_version != self.KEYCHAIN_MAJOR_VERSION or + db_header.magic != self.KEYCHAIN_MAGIC_HEADER): + return + + # Read the database schema and extract the offset for all the tables. + # They are ordered by file position from the top to the bottom of the file. + try: + db_schema = self.KEYCHAIN_DB_SCHEMA.parse_stream(file_object) + except (IOError, construct.FieldError): + return + table_offsets = [] + for _ in range(db_schema.number_of_tables): + try: + table_offset = self.TABLE_OFFSET.parse_stream(file_object) + except (IOError, construct.FieldError): + return + table_offsets.append(table_offset + self.KEYCHAIN_DB_HEADER.sizeof()) + return table_offsets + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a Keychain file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + table_offsets = self._VerifyStructure(file_object) + if not table_offsets: + file_object.close() + raise errors.UnableToParseFile(u'The file is not a Keychain file.') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + for table_offset in table_offsets: + # Skipping X bytes, unknown data at this point. + file_object.seek(table_offset - file_object.tell(), os.SEEK_CUR) + try: + table = self.TABLE_HEADER.parse_stream(file_object) + except construct.FieldError as exception: + logging.warning(( + u'[{0:s}] Unable to parse table header in file: {1:s} ' + u'with error: {2:s}.').format( + self.NAME, parser_context.GetDisplayName(file_entry), + exception)) + continue + + # Table_offset: absolute byte in the file where the table starts. + # table.first_record: first record in the table, relative to the + # first byte of the table. + file_object.seek( + table_offset + table.first_record - file_object.tell(), os.SEEK_CUR) + + if table.record_type == self.RECORD_TYPE_INTERNET: + for _ in range(table.number_of_records): + self._ReadEntryInternet( + parser_context, file_object, file_entry=file_entry, + parser_chain=parser_chain) + + elif table.record_type == self.RECORD_TYPE_APPLICATION: + for _ in range(table.number_of_records): + self._ReadEntryApplication( + parser_context, file_object, file_entry=file_entry, + parser_chain=parser_chain) + + file_object.close() + + +manager.ParsersManager.RegisterParser(KeychainParser) diff --git a/plaso/parsers/mac_keychain_test.py b/plaso/parsers/mac_keychain_test.py new file mode 100644 index 0000000..6fe352b --- /dev/null +++ b/plaso/parsers/mac_keychain_test.py @@ -0,0 +1,107 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for Keychain password database parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import mac_keychain as mac_keychain_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import mac_keychain + + +class MacKeychainParserTest(test_lib.ParserTestCase): + """Tests for keychain file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = mac_keychain.KeychainParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['login.keychain']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEqual(len(event_objects), 5) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-01-26 14:51:48') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual( + event_object.timestamp_desc, + eventdata.EventTimestamp.CREATION_TIME) + self.assertEqual(event_object.entry_name, u'Secret Application') + self.assertEqual(event_object.account_name, u'moxilo') + expected_ssgp = (u'b8e44863af1cb0785b89681d22e2721997cc' + u'fb8adb8853e726aff94c8830b05a') + self.assertEqual(event_object.ssgp_hash, expected_ssgp) + self.assertEqual(event_object.text_description, u'N/A') + expected_msg = u'Name: Secret Application Account: moxilo' + expected_msg_short = u'Secret Application' + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + event_object = event_objects[1] + self.assertEqual( + event_object.timestamp_desc, + eventdata.EventTimestamp.MODIFICATION_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-01-26 14:52:29') + self.assertEqual(event_object.timestamp, expected_timestamp) + + event_object = event_objects[2] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-01-26 14:53:29') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.entry_name, u'Secret Note') + self.assertEqual(event_object.text_description, u'secure note') + self.assertEqual(len(event_object.ssgp_hash), 1696) + expected_msg = u'Name: Secret Note' + expected_msg_short = u'Secret Note' + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[3] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-01-26 14:54:33') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.entry_name, u'plaso.kiddaland.net') + self.assertEqual(event_object.account_name, u'MrMoreno') + expected_ssgp = (u'83ccacf55a8cb656d340ec405e9d8b308f' + u'ac54bb79c5c9b0219bd0d700c3c521') + self.assertEqual(event_object.ssgp_hash, expected_ssgp) + self.assertEqual(event_object.where, u'plaso.kiddaland.net') + self.assertEqual(event_object.protocol, u'http') + self.assertEqual(event_object.type_protocol, u'dflt') + self.assertEqual(event_object.text_description, u'N/A') + expected_msg = (u'Name: plaso.kiddaland.net Account: MrMoreno Where: ' + u'plaso.kiddaland.net Protocol: http (dflt)') + expected_msg_short = u'plaso.kiddaland.net' + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/mac_securityd.py b/plaso/parsers/mac_securityd.py new file mode 100644 index 0000000..c274204 --- /dev/null +++ b/plaso/parsers/mac_securityd.py @@ -0,0 +1,276 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the ASL securityd log plaintext parser.""" + +import datetime +import logging + +import pyparsing + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +# INFO: +# http://opensource.apple.com/source/Security/Security-55471/sec/securityd/ + + +class MacSecuritydLogEvent(time_events.TimestampEvent): + """Convenience class for a ASL securityd line event.""" + + DATA_TYPE = 'mac:asl:securityd:line' + + def __init__( + self, timestamp, structure, sender, sender_pid, + security_api, caller, message): + """Initializes the event object. + + Args: + timestamp: The timestamp time value, epoch. + structure: Structure with the parse fields. + level: String with the text representation of the priority level. + facility: String with the ASL facility. + sender: String with the name of the sender. + sender_pid: Process id of the sender. + security_api: Securityd function name. + caller: The caller field, a string containing two hex numbers. + message: String with the ASL message. + """ + super(MacSecuritydLogEvent, self).__init__( + timestamp, + eventdata.EventTimestamp.ADDED_TIME) + self.timestamp = timestamp + self.level = structure.level + self.sender_pid = sender_pid + self.facility = structure.facility + self.sender = sender + self.security_api = security_api + self.caller = caller + self.message = message + + +class MacSecuritydLogParser(text_parser.PyparsingSingleLineTextParser): + """Parses the securityd file that contains logs from the security daemon.""" + + NAME = 'mac_securityd' + DESCRIPTION = u'Parser for Mac OS X securityd log files.' + + ENCODING = u'utf-8' + + # Default ASL Securityd log. + SECURITYD_LINE = ( + text_parser.PyparsingConstants.MONTH.setResultsName('month') + + text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day') + + text_parser.PyparsingConstants.TIME.setResultsName('time') + + pyparsing.CharsNotIn(u'[').setResultsName('sender') + + pyparsing.Literal(u'[').suppress() + + text_parser.PyparsingConstants.PID.setResultsName('sender_pid') + + pyparsing.Literal(u']').suppress() + + pyparsing.Literal(u'<').suppress() + + pyparsing.CharsNotIn(u'>').setResultsName('level') + + pyparsing.Literal(u'>').suppress() + + pyparsing.Literal(u'[').suppress() + + pyparsing.CharsNotIn(u'{').setResultsName('facility') + + pyparsing.Literal(u'{').suppress() + + pyparsing.Optional(pyparsing.CharsNotIn( + u'}').setResultsName('security_api')) + + pyparsing.Literal(u'}').suppress() + + pyparsing.Optional(pyparsing.CharsNotIn(u']:').setResultsName('caller')) + + pyparsing.Literal(u']:').suppress() + + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('message')) + + # Repeated line. + REPEATED_LINE = ( + text_parser.PyparsingConstants.MONTH.setResultsName('month') + + text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day') + + text_parser.PyparsingConstants.TIME.setResultsName('time') + + pyparsing.Literal(u'--- last message repeated').suppress() + + text_parser.PyparsingConstants.INTEGER.setResultsName('times') + + pyparsing.Literal(u'time ---').suppress()) + + # Define the available log line structures. + LINE_STRUCTURES = [ + ('logline', SECURITYD_LINE), + ('repeated', REPEATED_LINE)] + + def __init__(self): + """Initializes a parser object.""" + super(MacSecuritydLogParser, self).__init__() + self._year_use = 0 + self._last_month = None + self.previous_structure = None + + def VerifyStructure(self, parser_context, line): + """Verify that this file is a ASL securityd log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + line: A single line from the text file. + + Returns: + True if this is the correct parser, False otherwise. + """ + try: + line = self.SECURITYD_LINE.parseString(line) + except pyparsing.ParseException: + logging.debug(u'Not a ASL securityd log file') + return False + # Check if the day, month and time is valid taking a random year. + month = timelib.MONTH_DICT.get(line.month.lower()) + if not month: + return False + if self._GetTimestamp(line.day, month, 2012, line.time) == 0: + return False + return True + + def ParseRecord(self, parser_context, key, structure): + """Parse each record structure and return an EventObject if applicable. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + if key == 'repeated' or key == 'logline': + return self._ParseLogLine(parser_context, structure, key) + else: + logging.warning( + u'Unable to parse record, unknown structure: {0:s}'.format(key)) + + def _ParseLogLine(self, parser_context, structure, key): + """Parse a logline and store appropriate attributes. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + # TODO: improving this to get a valid year. + if not self._year_use: + self._year_use = parser_context.year + + if not self._year_use: + # Get from the creation time of the file. + self._year_use = self._GetYear( + self.file_entry.GetStat(), parser_context.timezone) + # If fail, get from the current time. + if not self._year_use: + self._year_use = timelib.GetCurrentYear() + + # Gap detected between years. + month = timelib.MONTH_DICT.get(structure.month.lower()) + if not self._last_month: + self._last_month = month + if month < self._last_month: + self._year_use += 1 + timestamp = self._GetTimestamp( + structure.day, + month, + self._year_use, + structure.time) + if not timestamp: + logging.debug(u'Invalid timestamp {0:s}'.format(structure.timestamp)) + return + self._last_month = month + + if key == 'logline': + self.previous_structure = structure + message = structure.message + else: + times = structure.times + structure = self.previous_structure + message = u'Repeated {0:d} times: {1:s}'.format( + times, structure.message) + + # It uses CarsNotIn structure which leaves whitespaces + # at the beginning of the sender and the caller. + sender = structure.sender.strip() + caller = structure.caller.strip() + if not caller: + caller = 'unknown' + if not structure.security_api: + security_api = u'unknown' + else: + security_api = structure.security_api + + return MacSecuritydLogEvent( + timestamp, structure, sender, structure.sender_pid, security_api, + caller, message) + + def _GetTimestamp(self, day, month, year, time): + """Gets a timestamp from a pyparsing ParseResults timestamp. + + This is a timestamp_string as returned by using + text_parser.PyparsingConstants structures: + 08, Nov, [20, 36, 37] + + Args: + day: An integer representing the day. + month: An integer representing the month. + year: An integer representing the year. + time: A list containing the hours, minutes, seconds. + + Returns: + timestamp: A plaso timestamp. + """ + hours, minutes, seconds = time + return timelib.Timestamp.FromTimeParts( + year, month, day, hours, minutes, seconds) + + def _GetYear(self, stat, zone): + """Retrieves the year either from the input file or from the settings.""" + time = getattr(stat, 'crtime', 0) + if not time: + time = getattr(stat, 'ctime', 0) + + if not time: + current_year = timelib.GetCurrentYear() + logging.error(( + u'Unable to determine year of log file.\nDefaulting to: ' + u'{0:d}').format(current_year)) + return current_year + + try: + timestamp = datetime.datetime.fromtimestamp(time, zone) + except ValueError: + current_year = timelib.GetCurrentYear() + logging.error(( + u'Unable to determine year of log file.\nDefaulting to: ' + u'{0:d}').format(current_year)) + return current_year + + return timestamp.year + + +manager.ParsersManager.RegisterParser(MacSecuritydLogParser) diff --git a/plaso/parsers/mac_securityd_test.py b/plaso/parsers/mac_securityd_test.py new file mode 100644 index 0000000..4265b3c --- /dev/null +++ b/plaso/parsers/mac_securityd_test.py @@ -0,0 +1,159 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a unit test for ASL securityd log parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import mac_securityd as mac_securityd_formatter +from plaso.lib import timelib_test +from plaso.parsers import mac_securityd as mac_securityd_parser +from plaso.parsers import test_lib + + +class MacSecurityUnitTest(test_lib.ParserTestCase): + """A unit test for the ASL securityd log parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = mac_securityd_parser.MacSecuritydLogParser() + + def testParseFile(self): + """Test parsing of a ASL securityd log file.""" + knowledge_base_values = {'year': 2013} + test_file = self._GetTestFilePath(['security.log']) + events = self._ParseFile( + self._parser, test_file, knowledge_base_values=knowledge_base_values) + event_objects = self._GetEventObjectsFromQueue(events) + + self.assertEqual(len(event_objects), 9) + + event_object = event_objects[0] + expected_msg = ( + u'Sender: secd (1) Level: Error Facility: user ' + u'Text: securityd_xpc_dictionary_handler EscrowSecurityAl' + u'[3273] DeviceInCircle \xdeetta \xe6tti a\xf0 ' + u'virka l\xedka, setja \xedslensku inn.') + expected_msg_short = ( + u'Text: securityd_xpc_dictionary_handler ' + u'EscrowSecurityAl[3273] DeviceInCircle ...') + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-02-26 19:11:56') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.sender, u'secd') + self.assertEqual(event_object.sender_pid, 1) + self.assertEqual(event_object.facility, u'user') + self.assertEqual(event_object.security_api, u'unknown') + self.assertEqual(event_object.caller, u'unknown') + self.assertEqual(event_object.level, u'Error') + expected_msg = ( + u'securityd_xpc_dictionary_handler EscrowSecurityAl' + u'[3273] DeviceInCircle \xdeetta \xe6tti a\xf0 virka ' + u'l\xedka, setja \xedslensku inn.') + self.assertEqual(event_object.message, expected_msg) + + event_object = event_objects[1] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-26 19:11:57') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.sender, u'secd') + self.assertEqual(event_object.sender_pid, 11) + self.assertEqual(event_object.facility, u'serverxpc') + self.assertEqual(event_object.security_api, u'SOSCCThisDeviceIsInCircle') + self.assertEqual(event_object.caller, u'unknown') + self.assertEqual(event_object.level, u'Notice') + + event_object = event_objects[2] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-26 19:11:58') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.sender, u'secd') + self.assertEqual(event_object.sender_pid, 111) + self.assertEqual(event_object.facility, u'user') + self.assertEqual(event_object.security_api, u'unknown') + self.assertEqual(event_object.caller, u'unknown') + self.assertEqual(event_object.level, u'Debug') + + event_object = event_objects[3] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-26 19:11:59') + self.assertEqual(event_object.timestamp, 1388085119000000) + + self.assertEqual(event_object.sender, u'secd') + self.assertEqual(event_object.sender_pid, 1111) + self.assertEqual(event_object.facility, u'user') + self.assertEqual(event_object.security_api, u'SOSCCThisDeviceIsInCircle') + self.assertEqual(event_object.caller, u'C0x7fff872fa482') + self.assertEqual(event_object.level, u'Error') + + event_object = event_objects[4] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-06 19:11:01') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.sender, u'secd') + self.assertEqual(event_object.sender_pid, 1) + self.assertEqual(event_object.facility, u'user') + self.assertEqual(event_object.security_api, u'unknown') + self.assertEqual(event_object.caller, u'unknown') + self.assertEqual(event_object.level, u'Error') + self.assertEqual(event_object.message, u'') + + event_object = event_objects[5] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-06 19:11:02') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.sender, u'secd') + self.assertEqual(event_object.sender_pid, 11111) + self.assertEqual(event_object.facility, u'user') + self.assertEqual(event_object.security_api, u'SOSCCThisDeviceIsInCircle') + self.assertEqual(event_object.caller, u'C0x7fff872fa482 F0x106080db0') + self.assertEqual(event_object.level, u'Error') + self.assertEqual(event_object.message, u'') + + event_object = event_objects[6] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-31 23:59:59') + self.assertEqual(event_object.timestamp, expected_timestamp) + + event_object = event_objects[7] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-03-01 00:00:01') + self.assertEqual(event_object.timestamp, expected_timestamp) + + # Repeated line. + event_object = event_objects[8] + expected_msg = u'Repeated 3 times: Happy new year!' + self.assertEqual(event_object.message, expected_msg) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/mac_wifi.py b/plaso/parsers/mac_wifi.py new file mode 100644 index 0000000..99e1412 --- /dev/null +++ b/plaso/parsers/mac_wifi.py @@ -0,0 +1,280 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the wifi.log (Mac OS X) parser.""" + +import datetime +import logging +import re + +import pyparsing + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + + +__author__ = 'Joaquin Moreno Garijo (bastionado@gmail.com)' + + +class MacWifiLogEvent(time_events.TimestampEvent): + """Convenience class for a Mac Wifi log line event.""" + + DATA_TYPE = 'mac:wifilog:line' + + def __init__(self, timestamp, agent, function, text, action): + """Initializes the event object. + + Args: + timestamp: The timestamp time value, epoch. + source_code: Details of the source code file generating the event. + log_level: The log level used for the event. + text: The log message + action: A string containing known WiFI actions, eg: connected to + an AP, configured, etc. If the action is not known, + the value is the message of the log (text variable). + """ + super(MacWifiLogEvent, self).__init__( + timestamp, eventdata.EventTimestamp.ADDED_TIME) + self.agent = agent + self.function = function + self.text = text + self.action = action + + +class MacWifiLogParser(text_parser.PyparsingSingleLineTextParser): + """Parse text based on wifi.log file.""" + + NAME = 'macwifi' + DESCRIPTION = u'Parser for Mac OS X wifi.log files.' + + ENCODING = u'utf-8' + + # Regular expressions for known actions. + RE_CONNECTED = re.compile(r'Already\sassociated\sto\s(.*)\.\sBailing') + RE_WIFI_PARAMETERS = re.compile( + r'\[ssid=(.*?), bssid=(.*?), security=(.*?), rssi=') + + # Define how a log line should look like. + WIFI_LINE = ( + text_parser.PyparsingConstants.MONTH.setResultsName('day_of_week') + + text_parser.PyparsingConstants.MONTH.setResultsName('month') + + text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day') + + text_parser.PyparsingConstants.TIME_MSEC.setResultsName('time') + + pyparsing.Literal(u'<') + + pyparsing.CharsNotIn(u'>').setResultsName('agent') + + pyparsing.Literal(u'>') + + pyparsing.CharsNotIn(u':').setResultsName('function') + + pyparsing.Literal(u':') + + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text')) + + WIFI_HEADER = ( + text_parser.PyparsingConstants.MONTH.setResultsName('day_of_week') + + text_parser.PyparsingConstants.MONTH.setResultsName('month') + + text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day') + + text_parser.PyparsingConstants.TIME_MSEC.setResultsName('time') + + pyparsing.Literal(u'***Starting Up***')) + + # Define the available log line structures. + LINE_STRUCTURES = [ + ('logline', WIFI_LINE), + ('header', WIFI_HEADER)] + + def __init__(self): + """Initializes a parser object.""" + super(MacWifiLogParser, self).__init__() + self._year_use = 0 + self._last_month = None + + def _GetAction(self, agent, function, text): + """Parse the well know actions for easy reading. + + Args: + agent: The device that generate the entry. + function: The function or action called by the agent. + text: Mac Wifi log text. + + Returns: + know_action: A formatted string representing the known (or common) action. + """ + if not agent.startswith('airportd'): + return text + + if 'airportdProcessDLILEvent' in function: + interface = text.split()[0] + return u'Interface {0:s} turn up.'.format(interface) + + if 'doAutoJoin' in function: + match = re.match(self.RE_CONNECTED, text) + if match: + ssid = match.group(1)[1:-1] + else: + ssid = 'Unknown' + return u'Wifi connected to SSID {0:s}'.format(ssid) + + if 'processSystemPSKAssoc' in function: + wifi_parameters = self.RE_WIFI_PARAMETERS.search(text) + if wifi_parameters: + ssid = wifi_parameters.group(1) + bssid = wifi_parameters.group(2) + security = wifi_parameters.group(3) + if not ssid: + ssid = 'Unknown' + if not bssid: + bssid = 'Unknown' + if not security: + security = 'Unknown' + return ( + u'New wifi configured. BSSID: {0:s}, SSID: {1:s}, ' + u'Security: {2:s}.').format(bssid, ssid, security) + return text + + def _GetTimestamp(self, day, month, year, time): + """Gets a timestamp from a pyparsing ParseResults timestamp. + + This is a timestamp_string as returned by using + text_parser.PyparsingConstants structures: + 08, Nov, [20, 36, 37], 222] + + Args: + timestamp_string: The pyparsing ParseResults object + + Returns: + day: An integer representing the day. + month: An integer representing the month. + year: An integer representing the year. + timestamp: A plaso timelib timestamp event or 0. + """ + try: + time_part, millisecond = time + hour, minute, second = time_part + timestamp = timelib.Timestamp.FromTimeParts( + year, month, day, hour, minute, second, + microseconds=(millisecond * 1000)) + except ValueError: + timestamp = 0 + return timestamp + + def _GetYear(self, stat, zone): + """Retrieves the year either from the input file or from the settings.""" + time = getattr(stat, 'crtime', 0) + if not time: + time = getattr(stat, 'ctime', 0) + + if not time: + logging.error( + ('Unable to determine correct year of syslog file, using current ' + 'year')) + return timelib.GetCurrentYear() + + try: + timestamp = datetime.datetime.fromtimestamp(time, zone) + except ValueError as exception: + logging.error(( + u'Unable to determine correct year of syslog file, using current ' + u'one, with error: {0:s}').format(exception)) + return timelib.GetCurrentYear() + return timestamp.year + + def _ParseLogLine(self, parser_context, structure): + """Parse a logline and store appropriate attributes. + + Args: + parser_context: A parser context object (instance of ParserContext). + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + # TODO: improving this to get a valid year. + if not self._year_use: + self._year_use = parser_context.year + + if not self._year_use: + # Get from the creation time of the file. + self._year_use = self._GetYear( + self.file_entry.GetStat(), parser_context.timezone) + # If fail, get from the current time. + if not self._year_use: + self._year_use = timelib.GetCurrentYear() + + # Gap detected between years. + month = timelib.MONTH_DICT.get(structure.month.lower()) + if not self._last_month: + self._last_month = month + if month < self._last_month: + self._year_use += 1 + timestamp = self._GetTimestamp( + structure.day, + month, + self._year_use, + structure.time) + if not timestamp: + logging.debug(u'Invalid timestamp {0:s}'.format(structure.timestamp)) + return + self._last_month = month + + text = structure.text + + # Due to the use of CharsNotIn pyparsing structure contains whitespaces + # that need to be removed. + function = structure.function.strip() + action = self._GetAction(structure.agent, function, text) + return MacWifiLogEvent( + timestamp, structure.agent, function, text, action) + + def ParseRecord(self, parser_context, key, structure): + """Parse each record structure and return an EventObject if applicable. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + if key == 'logline': + return self._ParseLogLine(parser_context, structure) + elif key != 'header': + logging.warning( + u'Unable to parse record, unknown structure: {0:s}'.format(key)) + + def VerifyStructure(self, parser_context, line): + """Verify that this file is a Mac Wifi log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + line: A single line from the text file. + + Returns: + True if this is the correct parser, False otherwise. + """ + try: + _ = self.WIFI_HEADER.parseString(line) + except pyparsing.ParseException: + logging.debug(u'Not a Mac Wifi log file') + return False + return True + + +manager.ParsersManager.RegisterParser(MacWifiLogParser) diff --git a/plaso/parsers/mac_wifi_test.py b/plaso/parsers/mac_wifi_test.py new file mode 100644 index 0000000..762c233 --- /dev/null +++ b/plaso/parsers/mac_wifi_test.py @@ -0,0 +1,134 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Mac wifi.log parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import mac_wifi as mac_wifi_formatter +from plaso.lib import timelib_test +from plaso.parsers import mac_wifi +from plaso.parsers import test_lib + + +class MacWifiUnitTest(test_lib.ParserTestCase): + """Tests for the Mac wifi.log parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = mac_wifi.MacWifiLogParser() + + def testParse(self): + """Tests the Parse function.""" + knowledge_base_values = {'year': 2013} + test_file = self._GetTestFilePath(['wifi.log']) + event_queue_consumer = self._ParseFile( + self._parser, test_file, knowledge_base_values=knowledge_base_values) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEqual(len(event_objects), 9) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-14 20:36:37.222') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.agent, u'airportd[88]') + self.assertEqual(event_object.function, u'airportdProcessDLILEvent') + self.assertEqual(event_object.action, u'Interface en0 turn up.') + self.assertEqual(event_object.text, u'en0 attached (up)') + + expected_msg = ( + u'Action: Interface en0 turn up. ' + u'(airportdProcessDLILEvent) ' + u'Log: en0 attached (up)') + expected_msg_short = ( + u'Action: Interface en0 turn up.') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[1] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-14 20:36:43.818') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.agent, u'airportd[88]') + self.assertEqual(event_object.function, u'_doAutoJoin') + self.assertEqual(event_object.action, u'Wifi connected to SSID CampusNet') + + expected_text = ( + u'Already associated to \u201cCampusNet\u201d. Bailing on auto-join.') + self.assertEqual(event_object.text, expected_text) + + event_object = event_objects[2] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-14 21:50:52.395') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.agent, u'airportd[88]') + self.assertEqual(event_object.function, u'_handleLinkEvent') + + expected_string = ( + u'Unable to process link event, op mode request returned -3903 ' + u'(Operation not supported)') + + self.assertEqual(event_object.action, expected_string) + self.assertEqual(event_object.text, expected_string) + + event_object = event_objects[5] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-14 21:52:09.883') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(u'airportd[88]', event_object.agent) + self.assertEqual(u'_processSystemPSKAssoc', event_object.function) + + expected_action = ( + u'New wifi configured. BSSID: 88:30:8a:7a:61:88, SSID: AndroidAP, ' + u'Security: WPA2 Personal.') + + self.assertEqual(event_object.action, expected_action) + + expected_text = ( + u'No password for network ' + u'[ssid=AndroidAP, bssid=88:30:8a:7a:61:88, security=WPA2 ' + u'Personal, rssi=-21, channel= ' + u'[channelNumber=11(2GHz), channelWidth={20MHz}], ibss=0] ' + u'in the system keychain') + + self.assertEqual(event_object.text, expected_text) + + event_object = event_objects[7] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-31 23:59:38.165') + self.assertEqual(event_object.timestamp, expected_timestamp) + + event_object = event_objects[8] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-01-01 01:12:17.311') + self.assertEqual(event_object.timestamp, expected_timestamp) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/mactime.py b/plaso/parsers/mactime.py new file mode 100644 index 0000000..9a2a882 --- /dev/null +++ b/plaso/parsers/mactime.py @@ -0,0 +1,153 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for the Sleuthkit (TSK) bodyfile or mactime format. + +The format specifications can be read here: + http://wiki.sleuthkit.org/index.php?title=Body_file +""" + +import re + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import manager +from plaso.parsers import text_parser + + +class MactimeEvent(time_events.PosixTimeEvent): + """Convenience class for a mactime-based event.""" + + DATA_TYPE = 'fs:mactime:line' + + def __init__(self, posix_time, usage, row_offset, data): + """Initializes a mactime-based event object. + + Args: + posix_time: The POSIX time value. + usage: The description of the usage of the time value. + row_offset: The offset of the row. + data: A dict object containing extracted data from the body file. + """ + super(MactimeEvent, self).__init__(posix_time, usage) + self.offset = row_offset + self.user_sid = unicode(data.get('uid', u'')) + self.user_gid = data.get('gid', None) + self.md5 = data.get('md5', None) + self.filename = data.get('name', 'N/A') + # Check if the filename field is not a string, eg in the instances where a + # filename only conists of numbers. In that case the self.filename field + # becomes an integer value instead of a string value. That causes issues + # later in the process, where we expect the filename value to be a string. + if not isinstance(self.filename, basestring): + self.filename = unicode(self.filename) + + self.mode_as_string = data.get('mode_as_string', None) + self.size = data.get('size', None) + + inode_number = data.get('inode', 0) + if isinstance(inode_number, basestring): + if '-' in inode_number: + inode_number, _, _ = inode_number.partition('-') + + try: + inode_number = int(inode_number, 10) + except ValueError: + inode_number = 0 + + self.inode = inode_number + + +class MactimeParser(text_parser.TextCSVParser): + """Parses SleuthKit's mactime bodyfiles.""" + + NAME = 'mactime' + DESCRIPTION = u'Parser for SleuthKit\'s mactime bodyfiles.' + + COLUMNS = [ + 'md5', 'name', 'inode', 'mode_as_string', 'uid', 'gid', 'size', + 'atime', 'mtime', 'ctime', 'crtime'] + VALUE_SEPARATOR = '|' + + MD5_RE = re.compile('^[0-9a-fA-F]+$') + + _TIMESTAMP_DESC_MAP = { + 'atime': eventdata.EventTimestamp.ACCESS_TIME, + 'crtime': eventdata.EventTimestamp.CREATION_TIME, + 'ctime': eventdata.EventTimestamp.CHANGE_TIME, + 'mtime': eventdata.EventTimestamp.MODIFICATION_TIME, + } + + def VerifyRow(self, unused_parser_context, row): + """Verify we are dealing with a mactime bodyfile. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: A single row from the CSV file. + + Returns: + True if this is the correct parser, False otherwise. + """ + if not self.MD5_RE.match(row['md5']): + return False + + try: + # Verify that the "size" field is an integer, thus cast it to int + # and then back to string so it can be compared, if the value is + # not a string representation of an integer, eg: '12a' then this + # conversion will fail and we return a False value. + if str(int(row.get('size', '0'), 10)) != row.get('size', None): + return False + except ValueError: + return False + + # TODO: Add additional verification. + return True + + def ParseRow( + self, parser_context, row_offset, row, file_entry=None, + parser_chain=None): + """Parses a row and extract event objects. + + Args: + parser_context: A parser context object (instance of ParserContext). + row_offset: The offset of the row. + row: A dictionary containing all the fields as denoted in the + COLUMNS class list. + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + for key, value in row.iteritems(): + if isinstance(row[key], basestring): + try: + row[key] = int(value, 10) + except ValueError: + pass + + for key, timestamp_description in self._TIMESTAMP_DESC_MAP.iteritems(): + value = row.get(key, None) + if not value: + continue + event_object = MactimeEvent( + value, timestamp_description, row_offset, row) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +manager.ParsersManager.RegisterParser(MactimeParser) diff --git a/plaso/parsers/mactime_test.py b/plaso/parsers/mactime_test.py new file mode 100644 index 0000000..c49f0ec --- /dev/null +++ b/plaso/parsers/mactime_test.py @@ -0,0 +1,105 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests the for mactime parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import mactime as mactime_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import mactime +from plaso.parsers import test_lib +from plaso.serializer import protobuf_serializer + + +class MactimeUnitTest(test_lib.ParserTestCase): + """Tests the for mactime parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = mactime.MactimeParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['mactime.body']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The file contains 13 lines x 4 timestamps per line, which should be + # 52 events in total. However several of these events have an empty + # timestamp value and are omitted. + # Total entries: 11 * 3 + 2 * 4 = 41 + self.assertEquals(len(event_objects), 41) + + # Test this entry: + # 0|/a_directory/another_file|16|r/rrw-------|151107|5000|22|1337961583| + # 1337961584|1337961585|0 + event_object = event_objects[6] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + u'2012-05-25 15:59:43+00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.ACCESS_TIME) + self.assertEquals(event_object.inode, 16) + + event_object = event_objects[6] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + u'2012-05-25 15:59:43+00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.ACCESS_TIME) + + expected_string = u'/a_directory/another_file' + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + event_object = event_objects[8] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + u'2012-05-25 15:59:44+00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.MODIFICATION_TIME) + + event_object = event_objects[7] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + u'2012-05-25 15:59:45+00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CHANGE_TIME) + self.assertEquals(event_object.filename, u'/a_directory/another_file') + self.assertEquals(event_object.mode_as_string, u'r/rrw-------') + + event_object = event_objects[37] + + self.assertEquals(event_object.inode, 4) + + # Serialize the event objects. + serialized_events = [] + serializer = protobuf_serializer.ProtobufEventObjectSerializer + for event_object in event_objects: + serialized_events.append(serializer.WriteSerialized(event_object)) + + self.assertEquals(len(serialized_events), len(event_objects)) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/manager.py b/plaso/parsers/manager.py new file mode 100644 index 0000000..976cf2b --- /dev/null +++ b/plaso/parsers/manager.py @@ -0,0 +1,205 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The parsers and plugins manager objects.""" + +from plaso.frontend import presets + + +class ParsersManager(object): + """Class that implements the parsers manager.""" + + _parser_classes = {} + + @classmethod + def DeregisterParser(cls, parser_class): + """Deregisters a parser class. + + The parser classes are identified based on their lower case name. + + Args: + parser_class: the class object of the parser. + + Raises: + KeyError: if parser class is not set for the corresponding name. + """ + parser_name = parser_class.NAME.lower() + if parser_name not in cls._parser_classes: + raise KeyError( + u'Parser class not set for name: {0:s}.'.format( + parser_class.NAME)) + + del cls._parser_classes[parser_name] + + @classmethod + def GetFilterListsFromString(cls, parser_filter_string): + """Determines an include and exclude list of parser and plugin names. + + Takes a comma separated string and splits it up into two lists, + of parsers or plugins to include and to exclude from selection. + If a particular filter is prepended with a minus sign it will + be included in the exclude section, otherwise in the include. + + Args: + parser_filter_string: The parser filter string. + + Returns: + A tuple of two lists, include and exclude. + """ + includes = [] + excludes = [] + + preset_categories = presets.categories.keys() + + for filter_string in parser_filter_string.split(','): + filter_string = filter_string.strip() + if not filter_string: + continue + + if filter_string.startswith('-'): + active_list = excludes + filter_string = filter_string[1:] + else: + active_list = includes + + filter_string = filter_string.lower() + if filter_string in cls._parser_classes: + parser_class = cls._parser_classes[filter_string] + active_list.append(filter_string) + + if parser_class.SupportsPlugins(): + active_list.extend(parser_class.GetPluginNames()) + + elif filter_string in preset_categories: + active_list.extend( + presets.GetParsersFromCategory(filter_string)) + + else: + active_list.append(filter_string) + + return includes, excludes + + @classmethod + def GetParserNames(cls, parser_filter_string=None): + """Retrieves the parser names. + + Args: + parser_filter_string: Optional parser filter string. The default is None. + + Returns: + A list of parser names. + """ + parser_names = [] + + for parser_name, _ in cls.GetParsers( + parser_filter_string=parser_filter_string): + parser_names.append(parser_name) + + return parser_names + + @classmethod + def GetParserObjects(cls, parser_filter_string=None): + """Retrieves the parser objects. + + Args: + parser_filter_string: Optional parser filter string. The default is None. + + Returns: + A list of parser objects (instances of BaseParser). + """ + parser_objects = [] + + for _, parser_class in cls.GetParsers( + parser_filter_string=parser_filter_string): + parser_object = parser_class() + parser_objects.append(parser_object) + + return parser_objects + + @classmethod + def GetParsers(cls, parser_filter_string=None): + """Retrieves the registered parsers. + + Args: + parser_filter_string: Optional parser filter string. The default is None. + + Yields: + A tuple that contains the uniquely identifying name of the parser + and the parser class (subclass of BaseParser). + """ + if parser_filter_string: + includes, excludes = cls.GetFilterListsFromString(parser_filter_string) + else: + includes = None + excludes = None + + for parser_name, parser_class in cls._parser_classes.iteritems(): + if excludes and parser_name in excludes: + continue + + if includes and parser_name not in includes: + continue + + yield parser_name, parser_class + + @classmethod + def GetWindowsRegistryPlugins(cls): + """Build a list of all available Windows Registry plugins. + + Returns: + A plugins list (instance of PluginList). + """ + parser_class = cls._parser_classes.get('winreg', None) + if not parser_class: + return + + return parser_class.GetPluginList() + + @classmethod + def RegisterParser(cls, parser_class): + """Registers a parser class. + + The parser classes are identified based on their lower case name. + + Args: + parser_class: the class object of the parser. + + Raises: + KeyError: if parser class is already set for the corresponding name. + """ + parser_name = parser_class.NAME.lower() + if parser_name in cls._parser_classes: + raise KeyError(( + u'Parser class already set for name: {0:s}.').format( + parser_class.NAME)) + + cls._parser_classes[parser_name] = parser_class + + @classmethod + def RegisterParsers(cls, parser_classes): + """Registers parser classes. + + The parser classes are identified based on their lower case name. + + Args: + parser_classes: a list of class objects of the parsers. + + Raises: + KeyError: if parser class is already set for the corresponding name. + """ + for parser_class in parser_classes: + cls.RegisterParser(parser_class) diff --git a/plaso/parsers/manager_test.py b/plaso/parsers/manager_test.py new file mode 100644 index 0000000..755cd61 --- /dev/null +++ b/plaso/parsers/manager_test.py @@ -0,0 +1,152 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the parsers manager.""" + +import unittest + +from plaso.parsers import interface +from plaso.parsers import manager +from plaso.parsers import plugins + + +class TestParser(interface.BaseParser): + """Test parser.""" + + NAME = 'test_parser' + DESCRIPTION = u'Test parser.' + + def Parse(self, unused_parser_context, unused_file_entry, parser_chain=None): + """Parsers the file entry and extracts event objects. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + return + + +class TestParserWithPlugins(interface.BasePluginsParser): + """Test parser with plugins.""" + + NAME = 'test_parser_with_plugins' + DESCRIPTION = u'Test parser with plugins.' + + _plugin_classes = {} + + def Parse(self, unused_parser_context, unused_file_entry, parser_chain=None): + """Parsers the file entry and extracts event objects. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + return + + +class TestPlugin(plugins.BasePlugin): + """Test plugin.""" + + NAME = 'test_plugin' + DESCRIPTION = u'Test plugin.' + + def Process(self, unused_parser_context, unused_parser_chain=None, **kwargs): + """Evaluates if this is the correct plugin and processes data accordingly. + + Args: + parser_context: A parser context object (instance of ParserContext). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + kwargs: Depending on the plugin they may require different sets of + arguments to be able to evaluate whether or not this is + the correct plugin. + + Raises: + ValueError: When there are unused keyword arguments. + """ + return + + +class ParsersManagerTest(unittest.TestCase): + """Tests for the parsers manager.""" + + def testParserRegistration(self): + """Tests the RegisterParser and DeregisterParser functions.""" + # pylint: disable=protected-access + number_of_parsers = len(manager.ParsersManager._parser_classes) + + manager.ParsersManager.RegisterParser(TestParser) + self.assertEquals( + len(manager.ParsersManager._parser_classes), + number_of_parsers + 1) + + with self.assertRaises(KeyError): + manager.ParsersManager.RegisterParser(TestParser) + + manager.ParsersManager.DeregisterParser(TestParser) + self.assertEquals( + len(manager.ParsersManager._parser_classes), + number_of_parsers) + + def testPluginRegistration(self): + """Tests the RegisterPlugin and DeregisterPlugin functions.""" + TestParserWithPlugins.RegisterPlugin(TestPlugin) + # pylint: disable=protected-access + self.assertEquals( + len(TestParserWithPlugins._plugin_classes), 1) + + with self.assertRaises(KeyError): + TestParserWithPlugins.RegisterPlugin(TestPlugin) + + TestParserWithPlugins.DeregisterPlugin(TestPlugin) + self.assertEquals( + len(TestParserWithPlugins._plugin_classes), 0) + + def testGetFilterListsFromString(self): + """Tests the GetFilterListsFromString function.""" + TestParserWithPlugins.RegisterPlugin(TestPlugin) + manager.ParsersManager.RegisterParser(TestParserWithPlugins) + manager.ParsersManager.RegisterParser(TestParser) + + includes, excludes = manager.ParsersManager.GetFilterListsFromString( + 'test_parser') + + self.assertEquals(includes, ['test_parser']) + self.assertEquals(excludes, []) + + includes, excludes = manager.ParsersManager.GetFilterListsFromString( + '-test_parser') + + self.assertEquals(includes, []) + self.assertEquals(excludes, ['test_parser']) + + includes, excludes = manager.ParsersManager.GetFilterListsFromString( + 'test_parser_with_plugins') + + self.assertEquals(includes, ['test_parser_with_plugins', 'test_plugin']) + + TestParserWithPlugins.DeregisterPlugin(TestPlugin) + manager.ParsersManager.DeregisterParser(TestParserWithPlugins) + manager.ParsersManager.DeregisterParser(TestParser) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/mcafeeav.py b/plaso/parsers/mcafeeav.py new file mode 100644 index 0000000..fd29122 --- /dev/null +++ b/plaso/parsers/mcafeeav.py @@ -0,0 +1,141 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for McAfee Anti-Virus Logs. + +McAfee AV uses 4 logs to track when scans were run, when virus databases were +updated, and when files match the virus database.""" + +import logging + +from plaso.events import text_events +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + + +class McafeeAVEvent(text_events.TextEvent): + """Convenience class for McAfee AV Log events """ + DATA_TYPE = 'av:mcafee:accessprotectionlog' + + def __init__(self, timestamp, offset, attributes): + """Initializes a McAfee AV Log Event. + + Args: + timestamp: The timestamp time value. The timestamp contains the + number of seconds since Jan 1, 1970 00:00:00 UTC. + offset: The offset of the attributes. + attributes: Dict of elements from the AV log line. + """ + del attributes['time'] + del attributes['date'] + super(McafeeAVEvent, self).__init__(timestamp, offset, attributes) + self.full_path = attributes['filename'] + + +class McafeeAccessProtectionParser(text_parser.TextCSVParser): + """Parses the McAfee AV Access Protection Log.""" + + NAME = 'mcafee_protection' + DESCRIPTION = u'Parser for McAfee AV Access Protection log files.' + + VALUE_SEPARATOR = '\t' + # Define the columns of the McAfee AV Access Protection Log. + COLUMNS = ['date', 'time', 'status', 'username', 'filename', + 'trigger_location', 'rule', 'action'] + + def _GetTimestamp(self, date, time, timezone): + """Return a 64-bit signed timestamp in microseconds since Epoch. + + The timestamp is made up of two strings, the date and the time, separated + by a tab. The time is in local time. The month and day can be either 1 or 2 + characters long. E.g.: 7/30/2013\t10:22:48 AM + + Args: + date: The string representing the date. + time: The string representing the time. + timezone: The timezone object. + + Returns: + A plaso timestamp value, microseconds since Epoch in UTC or None. + """ + + if not (date and time): + logging.warning('Unable to extract timestamp from McAfee AV logline.') + return + + # TODO: Figure out how McAfee sets Day First and use that here. + # The in-file time format is '07/30/2013\t10:22:48 AM'. + return timelib.Timestamp.FromTimeString( + u'{0:s} {1:s}'.format(date, time), timezone=timezone) + + def VerifyRow(self, parser_context, row): + """Verify that this is a McAfee AV Access Protection Log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: A single row from the CSV file. + + Returns: + True if this is the correct parser, False otherwise. + """ + if len(row) != 8: + return False + + # This file can have a UTF-8 byte-order-marker at the beginning of + # the first row. + # TODO: Find out all the code pages this can have. Asked McAfee 10/31. + if row['date'][0:3] == '\xef\xbb\xbf': + row['date'] = row['date'][3:] + + # Check the date format! + # If it doesn't pass, then this isn't a McAfee AV Access Protection Log + try: + self._GetTimestamp(row['date'], row['time'], parser_context.timezone) + except (TypeError, ValueError): + return False + + # Use the presence of these strings as a backup or in case of partial file. + if (not 'Access Protection' in row['status'] and + not 'Would be blocked' in row['status']): + return False + + return True + + def ParseRow( + self, parser_context, row_offset, row, file_entry=None, + parser_chain=None): + """Parses a row and extract event objects. + + Args: + parser_context: A parser context object (instance of ParserContext). + row_offset: The offset of the row. + row: A dictionary containing all the fields as denoted in the + COLUMNS class list. + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + timestamp = self._GetTimestamp( + row['date'], row['time'], parser_context.timezone) + event_object = McafeeAVEvent(timestamp, row_offset, row) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +manager.ParsersManager.RegisterParser(McafeeAccessProtectionParser) diff --git a/plaso/parsers/mcafeeav_test.py b/plaso/parsers/mcafeeav_test.py new file mode 100644 index 0000000..24cb137 --- /dev/null +++ b/plaso/parsers/mcafeeav_test.py @@ -0,0 +1,79 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the McAfee AV Log parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import mcafeeav as mcafeeav_formatter +from plaso.parsers import mcafeeav +from plaso.parsers import test_lib + + +class McafeeAccessProtectionUnitTest(test_lib.ParserTestCase): + """Tests for the McAfee AV Log parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = mcafeeav.McafeeAccessProtectionParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['AccessProtectionLog.txt']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The file contains 14 lines which results in 14 event objects. + self.assertEquals(len(event_objects), 14) + + # Test that the UTF-8 byte order mark gets removed from the first line. + event_object = event_objects[0] + + self.assertEquals(event_object.timestamp, 1380292946000000) + + # Test this entry: + # 9/27/2013 2:42:26 PM Blocked by Access Protection rule + # SOMEDOMAIN\someUser C:\Windows\System32\procexp64.exe C:\Program Files + # (x86)\McAfee\Common Framework\UdaterUI.exe Common Standard + # Protection:Prevent termination of McAfee processes Action blocked : + # Terminate + + event_object = event_objects[1] + + self.assertEquals(event_object.timestamp, 1380292959000000) + self.assertEquals(event_object.username, u'SOMEDOMAIN\\someUser') + self.assertEquals( + event_object.full_path, u'C:\\Windows\\System32\\procexp64.exe') + + expected_msg = ( + u'File Name: C:\\Windows\\System32\\procexp64.exe ' + u'User: SOMEDOMAIN\\someUser ' + u'C:\\Program Files (x86)\\McAfee\\Common Framework\\Frame' + u'workService.exe ' + u'Blocked by Access Protection rule ' + u'Common Standard Protection:Prevent termination of McAfee processes ' + u'Action blocked : Terminate') + expected_msg_short = ( + u'C:\\Windows\\System32\\procexp64.exe ' + u'Action blocked : Terminate') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/msiecf.py b/plaso/parsers/msiecf.py new file mode 100644 index 0000000..45224cb --- /dev/null +++ b/plaso/parsers/msiecf.py @@ -0,0 +1,233 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Microsoft Internet Explorer (MSIE) Cache Files (CF).""" + +import logging + +import pymsiecf + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +if pymsiecf.get_version() < '20130317': + raise ImportWarning(u'MsiecfParser requires at least pymsiecf 20130317.') + + +class MsiecfUrlEvent(time_events.TimestampEvent): + """Convenience class for an MSIECF URL event.""" + + DATA_TYPE = 'msiecf:url' + + def __init__( + self, timestamp, timestamp_description, msiecf_item, recovered=False): + """Initializes the event. + + Args: + timestamp: The timestamp value. + timestamp_desc: The usage string describing the timestamp. + msiecf_item: The MSIECF item (pymsiecf.url). + recovered: Boolean value to indicate the item was recovered, False + by default. + """ + super(MsiecfUrlEvent, self).__init__(timestamp, timestamp_description) + + self.recovered = recovered + self.offset = msiecf_item.offset + + self.url = msiecf_item.location + self.number_of_hits = msiecf_item.number_of_hits + self.cache_directory_index = msiecf_item.cache_directory_index + self.filename = msiecf_item.filename + self.cached_file_size = msiecf_item.cached_file_size + + if msiecf_item.type and msiecf_item.data: + if msiecf_item.type == u'cache': + if msiecf_item.data[:4] == 'HTTP': + self.http_headers = msiecf_item.data[:-1] + # TODO: parse data of other URL item type like history which requires + # OLE VT parsing. + + +class MsiecfParser(interface.BaseParser): + """Parses MSIE Cache Files (MSIECF).""" + + NAME = 'msiecf' + DESCRIPTION = u'Parser for MSIE Cache Files (MSIECF) also known as index.dat.' + + def _ParseUrl( + self, parser_context, msiecf_item, file_entry=None, parser_chain=None, + recovered=False): + """Extract data from a MSIE Cache Files (MSIECF) URL item. + + Every item is stored as an event object, one for each timestamp. + + Args: + parser_context: A parser context object (instance of ParserContext). + msiecf_item: An item (pymsiecf.url). + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + recovered: Boolean value to indicate the item was recovered, False + by default. + """ + # The secondary timestamp can be stored in either UTC or local time + # this is dependent on what the index.dat file is used for. + # Either the file path of the location string can be used to distinguish + # between the different type of files. + primary_timestamp = timelib.Timestamp.FromFiletime( + msiecf_item.get_primary_time_as_integer()) + primary_timestamp_desc = 'Primary Time' + + # Need to convert the FILETIME to the internal timestamp here to + # do the from localtime conversion. + secondary_timestamp = timelib.Timestamp.FromFiletime( + msiecf_item.get_secondary_time_as_integer()) + secondary_timestamp_desc = 'Secondary Time' + + if msiecf_item.type: + if msiecf_item.type == u'cache': + primary_timestamp_desc = eventdata.EventTimestamp.ACCESS_TIME + secondary_timestamp_desc = eventdata.EventTimestamp.MODIFICATION_TIME + + elif msiecf_item.type == u'cookie': + primary_timestamp_desc = eventdata.EventTimestamp.ACCESS_TIME + secondary_timestamp_desc = eventdata.EventTimestamp.MODIFICATION_TIME + + elif msiecf_item.type == u'history': + primary_timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME + secondary_timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME + + elif msiecf_item.type == u'history-daily': + primary_timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME + secondary_timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME + # The secondary_timestamp is in localtime normalize it to be in UTC. + secondary_timestamp = timelib.Timestamp.LocaltimeToUTC( + secondary_timestamp, parser_context.timezone) + + elif msiecf_item.type == u'history-weekly': + primary_timestamp_desc = eventdata.EventTimestamp.CREATION_TIME + secondary_timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME + # The secondary_timestamp is in localtime normalize it to be in UTC. + secondary_timestamp = timelib.Timestamp.LocaltimeToUTC( + secondary_timestamp, parser_context.timezone) + + event_object = MsiecfUrlEvent( + primary_timestamp, primary_timestamp_desc, msiecf_item, recovered) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if secondary_timestamp > 0: + event_object = MsiecfUrlEvent( + secondary_timestamp, secondary_timestamp_desc, msiecf_item, + recovered) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + expiration_timestamp = msiecf_item.get_expiration_time_as_integer() + if expiration_timestamp > 0: + # The expiration time in MSIECF version 4.7 is stored as a FILETIME value + # in version 5.2 it is stored as a FAT date time value. + # Since the as_integer function returns the raw integer value we need to + # apply the right conversion here. + if self.version == u'4.7': + event_object = MsiecfUrlEvent( + timelib.Timestamp.FromFiletime(expiration_timestamp), + eventdata.EventTimestamp.EXPIRATION_TIME, msiecf_item, recovered) + else: + event_object = MsiecfUrlEvent( + timelib.Timestamp.FromFatDateTime(expiration_timestamp), + eventdata.EventTimestamp.EXPIRATION_TIME, msiecf_item, recovered) + + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + last_checked_timestamp = msiecf_item.get_last_checked_time_as_integer() + if last_checked_timestamp > 0: + event_object = MsiecfUrlEvent( + timelib.Timestamp.FromFatDateTime(last_checked_timestamp), + eventdata.EventTimestamp.LAST_CHECKED_TIME, msiecf_item, recovered) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a MSIE Cache File (MSIECF). + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + msiecf_file = pymsiecf.file() + msiecf_file.set_ascii_codepage(parser_context.codepage) + + try: + msiecf_file.open_file_object(file_object) + + self.version = msiecf_file.format_version + except IOError as exception: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + for item_index in range(0, msiecf_file.number_of_items): + try: + msiecf_item = msiecf_file.get_item(item_index) + if isinstance(msiecf_item, pymsiecf.url): + self._ParseUrl( + parser_context, msiecf_item, file_entry=file_entry, + parser_chain=parser_chain) + + # TODO: implement support for pymsiecf.leak, pymsiecf.redirected, + # pymsiecf.item. + except IOError as exception: + logging.warning( + u'[{0:s}] unable to parse item: {1:d} in file: {2:s}: {3:s}'.format( + self.NAME, item_index, file_entry.name, exception)) + + for item_index in range(0, msiecf_file.number_of_recovered_items): + try: + msiecf_item = msiecf_file.get_recovered_item(item_index) + if isinstance(msiecf_item, pymsiecf.url): + self._ParseUrl( + parser_context, msiecf_item, file_entry=file_entry, + parser_chain=parser_chain, recovered=True) + + # TODO: implement support for pymsiecf.leak, pymsiecf.redirected, + # pymsiecf.item. + except IOError as exception: + logging.info(( + u'[{0:s}] unable to parse recovered item: {1:d} in file: {2:s}: ' + u'{3:s}').format( + self.NAME, item_index, file_entry.name, exception)) + + file_object.close() + + +manager.ParsersManager.RegisterParser(MsiecfParser) diff --git a/plaso/parsers/msiecf_test.py b/plaso/parsers/msiecf_test.py new file mode 100644 index 0000000..00f8e07 --- /dev/null +++ b/plaso/parsers/msiecf_test.py @@ -0,0 +1,113 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Microsoft Internet Explorer (MSIE) Cache Files (CF) parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import msiecf as msiecf_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import msiecf +from plaso.parsers import test_lib + + +class MsiecfParserTest(test_lib.ParserTestCase): + """Tests for the MSIE Cache Files (MSIECF) parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = msiecf.MsiecfParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['index.dat']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # MSIE Cache File information: + # File size: 32768 bytes + # Number of items: 7 + # Number of recovered items: 11 + # 7 + 11 records, each with 4 records. + + self.assertEquals(len(event_objects), (7 + 11) * 4) + + # Record type : URL + # Offset range : 21376 - 21632 (256) + # Location : Visited: testing@http://www.trafficfusionx.com + # /download/tfscrn2/funnycats.exe + # Primary time : Jun 23, 2011 18:02:10.066000000 + # Secondary time : Jun 23, 2011 18:02:10.066000000 + # Expiration time : Jun 29, 2011 17:55:02 + # Last checked time : Jun 23, 2011 18:02:12 + # Cache directory index : -2 (0xfe) + + event_object = event_objects[8] + expected_location = ( + u'Visited: testing@http://www.trafficfusionx.com/download/tfscrn2' + u'/funnycats.exe') + + self.assertEquals(event_object.offset, 21376) + self.assertEquals(event_object.url, expected_location) + self.assertEquals(event_object.cache_directory_index, -2) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-06-23 18:02:10.066') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.LAST_VISITED_TIME) + + event_object = event_objects[9] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-06-23 18:02:10.066') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.LAST_VISITED_TIME) + + event_object = event_objects[10] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-06-29 17:55:02') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.EXPIRATION_TIME) + + event_object = event_objects[11] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-06-23 18:02:12') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.LAST_CHECKED_TIME) + + expected_msg = ( + u'Location: Visited: testing@http://www.trafficfusionx.com/download' + u'/tfscrn2/funnycats.exe ' + u'Number of hits: 6 ' + u'Cached file size: 0') + expected_msg_short = ( + u'Location: Visited: testing@http://www.trafficfusionx.com/download' + u'/tfscrn2/fun...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/olecf.py b/plaso/parsers/olecf.py new file mode 100644 index 0000000..be553c2 --- /dev/null +++ b/plaso/parsers/olecf.py @@ -0,0 +1,109 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for OLE Compound Files (OLECF).""" + +import logging + +import pyolecf + +from plaso.lib import errors +from plaso.parsers import interface +from plaso.parsers import manager + + +if pyolecf.get_version() < '20131012': + raise ImportWarning('OleCfParser requires at least pyolecf 20131012.') + + +class OleCfParser(interface.BasePluginsParser): + """Parses OLE Compound Files (OLECF).""" + + NAME = 'olecf' + DESCRIPTION = u'Parser for OLE Compound Files (OLECF).' + + _plugin_classes = {} + + def __init__(self): + """Initializes a parser object.""" + super(OleCfParser, self).__init__() + self._plugins = OleCfParser.GetPluginObjects() + + for list_index, plugin_object in enumerate(self._plugins): + if plugin_object.NAME == 'olecf_default': + self._default_plugin = self._plugins.pop(list_index) + break + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extracts data from an OLE Compound File (OLECF). + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + UnableToParseFile: when the file cannot be parsed. + """ + file_object = file_entry.GetFileObject() + olecf_file = pyolecf.file() + olecf_file.set_ascii_codepage(parser_context.codepage) + + try: + olecf_file.open_file_object(file_object) + except IOError as exception: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + # Get a list of all root items from the OLE CF file. + root_item = olecf_file.root_item + item_names = [item.name for item in root_item.sub_items] + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + # Compare the list of available plugins. + # We will try to use every plugin against the file (except + # the default plugin) and run it. Only if none of the plugins + # works will we use the default plugin. + parsed = False + for plugin_object in self._plugins: + try: + plugin_object.Process( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + root_item=root_item, item_names=item_names) + + except errors.WrongPlugin: + logging.debug( + u'[{0:s}] plugin: {1:s} cannot parse the OLECF file: {2:s}'.format( + self.NAME, plugin_object.NAME, file_entry.name)) + + # Check if we still haven't parsed the file, and if so we will use + # the default OLECF plugin. + if not parsed and self._default_plugin: + self._default_plugin.Process( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + root_item=root_item, item_names=item_names) + + olecf_file.close() + file_object.close() + + +manager.ParsersManager.RegisterParser(OleCfParser) diff --git a/plaso/parsers/olecf_plugins/__init__.py b/plaso/parsers/olecf_plugins/__init__.py new file mode 100644 index 0000000..cf95c3e --- /dev/null +++ b/plaso/parsers/olecf_plugins/__init__.py @@ -0,0 +1,22 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an import statement for each OLECF plugin.""" + +from plaso.parsers.olecf_plugins import automatic_destinations +from plaso.parsers.olecf_plugins import default +from plaso.parsers.olecf_plugins import summary diff --git a/plaso/parsers/olecf_plugins/automatic_destinations.py b/plaso/parsers/olecf_plugins/automatic_destinations.py new file mode 100644 index 0000000..f109b77 --- /dev/null +++ b/plaso/parsers/olecf_plugins/automatic_destinations.py @@ -0,0 +1,204 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plugin to parse .automaticDestinations-ms OLECF files.""" + +import logging +import re + +import construct + +from plaso.events import time_events +from plaso.lib import binary +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.parsers import olecf +from plaso.parsers import winlnk +from plaso.parsers.olecf_plugins import interface + + +class AutomaticDestinationsDestListEntryEvent(time_events.FiletimeEvent): + """Convenience class for an .automaticDestinations-ms DestList entry event.""" + + DATA_TYPE = 'olecf:dest_list:entry' + + def __init__( + self, timestamp, timestamp_description, entry_offset, dest_list_entry): + """Initializes the event object. + + Args: + timestamp: The FILETIME value for the timestamp. + timestamp_description: The usage string for the timestamp value. + entry_offset: The offset of the DestList entry relative to the start of + the DestList stream. + dest_list_entry: The DestList entry (instance of construct.Struct). + """ + super(AutomaticDestinationsDestListEntryEvent, self).__init__( + timestamp, timestamp_description) + + self.offset = entry_offset + self.entry_number = dest_list_entry.entry_number + + self.hostname = binary.ByteStreamCopyToString( + dest_list_entry.hostname, codepage='ascii') + self.path = binary.Ut16StreamCopyToString(dest_list_entry.path) + self.pin_status = dest_list_entry.pin_status + + self.droid_volume_identifier = binary.ByteStreamCopyToGuid( + dest_list_entry.droid_volume_identifier) + self.droid_file_identifier = binary.ByteStreamCopyToGuid( + dest_list_entry.droid_file_identifier) + self.birth_droid_volume_identifier = binary.ByteStreamCopyToGuid( + dest_list_entry.birth_droid_volume_identifier) + self.birth_droid_file_identifier = binary.ByteStreamCopyToGuid( + dest_list_entry.birth_droid_file_identifier) + + +class AutomaticDestinationsOlecfPlugin(interface.OlecfPlugin): + """Plugin that parses an .automaticDestinations-ms OLECF file.""" + + NAME = 'olecf_automatic_destinations' + DESCRIPTION = u'Parser for *.automaticDestinations-ms OLECF files.' + + REQUIRED_ITEMS = frozenset([u'DestList']) + + _RE_LNK_ITEM_NAME = re.compile(r'^[1-9a-f][0-9a-f]*$') + + # We cannot use the parser registry here since winlnk could be disabled. + # TODO: see if there is a more elegant solution for this. + _WINLNK_PARSER = winlnk.WinLnkParser() + + _DEST_LIST_STREAM_HEADER = construct.Struct( + 'dest_list_stream_header', + construct.ULInt32('unknown1'), + construct.ULInt32('number_of_entries'), + construct.ULInt32('number_of_pinned_entries'), + construct.LFloat32('unknown2'), + construct.ULInt32('last_entry_number'), + construct.Padding(4), + construct.ULInt32('last_revision_number'), + construct.Padding(4)) + + _DEST_LIST_STREAM_HEADER_SIZE = _DEST_LIST_STREAM_HEADER.sizeof() + + # Using Construct's utf-16 encoding here will create strings with their + # end-of-string characters exposed. Instead the strings are read as + # binary strings and converted using ReadUtf16(). + _DEST_LIST_STREAM_ENTRY = construct.Struct( + 'dest_list_stream_entry', + construct.ULInt64('unknown1'), + construct.Array(16, construct.Byte('droid_volume_identifier')), + construct.Array(16, construct.Byte('droid_file_identifier')), + construct.Array(16, construct.Byte('birth_droid_volume_identifier')), + construct.Array(16, construct.Byte('birth_droid_file_identifier')), + construct.String('hostname', 16), + construct.ULInt32('entry_number'), + construct.ULInt32('unknown2'), + construct.LFloat32('unknown3'), + construct.ULInt64('last_modification_time'), + construct.ULInt32('pin_status'), + construct.ULInt16('path_size'), + construct.String('path', lambda ctx: ctx.path_size * 2)) + + def ParseDestList( + self, parser_context, file_entry=None, parser_chain=None, + olecf_item=None): + """Parses the DestList OLECF item. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + olecf_item: An optional OLECF item (instance of pyolecf.item). + """ + if not olecf_item: + return + + try: + header = self._DEST_LIST_STREAM_HEADER.parse_stream(olecf_item) + except (IOError, construct.FieldError) as exception: + raise errors.UnableToParseFile( + u'Unable to parse DestList header with error: {0:s}'.format( + exception)) + + if header.unknown1 != 1: + # TODO: add format debugging notes to parser context. + logging.debug(u'[{0:s}] unknown1 value: {1:d}.'.format( + self.NAME, header.unknown1)) + + entry_offset = olecf_item.get_offset() + while entry_offset < olecf_item.size: + try: + entry = self._DEST_LIST_STREAM_ENTRY.parse_stream(olecf_item) + except (IOError, construct.FieldError) as exception: + raise errors.UnableToParseFile( + u'Unable to parse DestList entry with error: {0:s}'.format( + exception)) + + if not entry: + break + + event_object = AutomaticDestinationsDestListEntryEvent( + entry.last_modification_time, + eventdata.EventTimestamp.MODIFICATION_TIME, entry_offset, entry) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + entry_offset = olecf_item.get_offset() + + def ParseItems( + self, parser_context, file_entry=None, parser_chain=None, root_item=None, + **unused_kwargs): + """Parses OLECF items. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + root_item: Optional root item of the OLECF file. The default is None. + + Raises: + ValueError: If the root_item is not set. + """ + if root_item is None: + raise ValueError(u'Root item not set.') + + for item in root_item.sub_items: + if item.name == u'DestList': + self.ParseDestList( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + olecf_item=item) + + elif self._RE_LNK_ITEM_NAME.match(item.name): + if file_entry: + display_name = u'{0:s} # {1:s}'.format( + parser_context.GetDisplayName(file_entry), item.name) + else: + display_name = u'# {0:s}'.format(item.name) + + self._WINLNK_PARSER.ParseFileObject( + parser_context, item, file_entry=file_entry, + parser_chain=parser_chain, display_name=display_name) + + # TODO: check for trailing data? + + +olecf.OleCfParser.RegisterPlugin(AutomaticDestinationsOlecfPlugin) diff --git a/plaso/parsers/olecf_plugins/automatic_destinations_test.py b/plaso/parsers/olecf_plugins/automatic_destinations_test.py new file mode 100644 index 0000000..ae93800 --- /dev/null +++ b/plaso/parsers/olecf_plugins/automatic_destinations_test.py @@ -0,0 +1,101 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the .automaticDestinations-ms OLECF file plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import olecf as olecf_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers.olecf_plugins import automatic_destinations +from plaso.parsers.olecf_plugins import test_lib + + +class TestAutomaticDestinationsOlecfPlugin(test_lib.OleCfPluginTestCase): + """Tests for the .automaticDestinations-ms OLECF file plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = automatic_destinations.AutomaticDestinationsOlecfPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath([ + u'1b4dd67f29cb1962.automaticDestinations-ms']) + event_queue_consumer = self._ParseOleCfFileWithPlugin( + test_file, self._plugin) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 44) + + # Check a AutomaticDestinationsDestListEntryEvent. + event_object = event_objects[3] + + self.assertEquals(event_object.offset, 32) + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.MODIFICATION_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-01 13:52:38.997538') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'Entry: 11 ' + u'Pin status: Unpinned ' + u'Hostname: wks-win764bitb ' + u'Path: C:\\Users\\nfury\\Pictures\\The SHIELD ' + u'Droid volume identifier: {cf6619c2-66a8-44a6-8849-1582fcd3a338} ' + u'Droid file identifier: {63eea867-7b85-11e1-8950-005056a50b40} ' + u'Birth droid volume identifier: ' + u'{cf6619c2-66a8-44a6-8849-1582fcd3a338} ' + u'Birth droid file identifier: {63eea867-7b85-11e1-8950-005056a50b40}') + + expected_msg_short = ( + u'Entry: 11 ' + u'Pin status: Unpinned ' + u'Path: C:\\Users\\nfury\\Pictures\\The SHIELD') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + # Check a WinLnkLinkEvent. + event_object = event_objects[1] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2010-11-10 07:51:16.749125') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'File size: 3545 ' + u'File attribute flags: 0x00002020 ' + u'Drive type: 3 ' + u'Drive serial number: 0x24ba718b ' + u'Local path: C:\\Users\\nfury\\AppData\\Roaming\\Microsoft\\Windows\\' + u'Libraries\\Documents.library-ms ' + u'Link target: [Users Libraries, UNKNOWN: 0x00]') + + expected_msg_short = ( + u'C:\\Users\\nfury\\AppData\\Roaming\\Microsoft\\Windows\\Libraries\\' + u'Documents.library-ms') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/olecf_plugins/default.py b/plaso/parsers/olecf_plugins/default.py new file mode 100644 index 0000000..0fbd8a9 --- /dev/null +++ b/plaso/parsers/olecf_plugins/default.py @@ -0,0 +1,162 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The default plugin for parsing OLE Compound Files (OLECF).""" + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import olecf +from plaso.parsers.olecf_plugins import interface + + +class OleCfItemEvent(time_events.FiletimeEvent): + """Convenience class for an OLECF item event.""" + + DATA_TYPE = 'olecf:item' + + def __init__(self, timestamp, usage, olecf_item): + """Initializes the event. + + Args: + timestamp: The FILETIME timestamp value. + usage: A string describing the timestamp value. + olecf_item: The OLECF item (pyolecf.item). + """ + super(OleCfItemEvent, self).__init__(timestamp, usage) + + # TODO: need a better way to express the original location of the + # original data. + self.offset = 0 + + self.name = olecf_item.name + # TODO: have pyolecf return the item type here. + # self.type = olecf_item.type + self.size = olecf_item.size + + +class DefaultOleCFPlugin(interface.OlecfPlugin): + """Class to define the default OLECF file plugin.""" + + NAME = 'olecf_default' + DESCRIPTION = u'Parser for a generic OLECF item.' + + def _ParseItem( + self, parser_context, file_entry=None, parser_chain=None, + olecf_item=None): + """Parses an OLECF item. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + olecf_item: An optional OLECF item (instance of pyolecf.item). + + Returns: + A boolean value indicating if an event object was produced. + """ + event_object = None + result = False + + creation_time, modification_time = self.GetTimestamps(olecf_item) + + if creation_time: + event_object = OleCfItemEvent( + creation_time, eventdata.EventTimestamp.CREATION_TIME, + olecf_item) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if modification_time: + event_object = OleCfItemEvent( + modification_time, eventdata.EventTimestamp.MODIFICATION_TIME, + olecf_item) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if event_object: + result = True + + for sub_item in olecf_item.sub_items: + if self._ParseItem( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + olecf_item=sub_item): + result = True + + return result + + def ParseItems( + self, parser_context, file_entry=None, parser_chain=None, root_item=None, + **unused_kwargs): + """Parses OLECF items. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + root_item: Optional root item of the OLECF file. The default is None. + """ + if not self._ParseItem( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + olecf_item=root_item): + # If no event object was produced, produce at least one for + # the root item. + event_object = OleCfItemEvent( + 0, eventdata.EventTimestamp.CREATION_TIME, root_item) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + def Process( + self, parser_context, file_entry=None, parser_chain=None, root_item=None, + item_names=None, **kwargs): + """Determine if this is the right plugin for this OLECF file. + + This function takes a list of sub items found in the root of a + OLECF file and compares that to a list of required items defined + in this plugin. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + root_item: Optional root item of the OLECF file. The default is None. + item_names: Optional list of all items discovered in the root. + The default is None. + + Raises: + errors.WrongPlugin: If the set of required items is not a subset + of the available items. + ValueError: If the root_item or items are not set. + """ + if root_item is None or item_names is None: + raise ValueError(u'Root item or items are not set.') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + self.ParseItems( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + root_item=root_item) + + +olecf.OleCfParser.RegisterPlugin(DefaultOleCFPlugin) diff --git a/plaso/parsers/olecf_plugins/default_test.py b/plaso/parsers/olecf_plugins/default_test.py new file mode 100644 index 0000000..97b0c42 --- /dev/null +++ b/plaso/parsers/olecf_plugins/default_test.py @@ -0,0 +1,75 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the OLE Compound File (OLECF) default plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import olecf as olecf_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers.olecf_plugins import default +from plaso.parsers.olecf_plugins import test_lib + + +class TestDefaultPluginOleCf(test_lib.OleCfPluginTestCase): + """Tests for the OLECF default plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = default.DefaultOleCFPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['Document.doc']) + event_queue_consumer = self._ParseOleCfFileWithPlugin( + test_file, self._plugin) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 5) + + # Check the Root Entry event. + event_object = event_objects[0] + + self.assertEquals(event_object.name, u'Root Entry') + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.MODIFICATION_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-05-16 02:29:49.795') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_string = ( + u'Name: Root Entry') + + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + # Check one other entry. + event_object = event_objects[1] + + expected_string = u'Name: MsoDataStore' + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-05-16 02:29:49.704') + self.assertEquals(event_object.timestamp, expected_timestamp) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/olecf_plugins/interface.py b/plaso/parsers/olecf_plugins/interface.py new file mode 100644 index 0000000..a93ac10 --- /dev/null +++ b/plaso/parsers/olecf_plugins/interface.py @@ -0,0 +1,150 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the necessary interface for OLECF plugins.""" + +import abc +import logging + +from plaso.lib import errors +from plaso.parsers import plugins + + +class OlecfPlugin(plugins.BasePlugin): + """An OLECF plugin for Plaso.""" + + NAME = 'olecf' + + # List of tables that should be present in the database, for verification. + REQUIRED_ITEMS = frozenset([]) + + def GetTimestamps(self, olecf_item): + """Takes an OLECF object and returns extracted timestamps. + + Args: + olecf_item: A OLECF item (instance of pyolecf.item). + + Returns: + A tuple of two timestamps: created and modified. + """ + if not olecf_item: + return None, None + + try: + creation_time = olecf_item.get_creation_time_as_integer() + except OverflowError as exception: + logging.warning( + u'Unable to read the creation time with error: {0:s}'.format( + exception)) + creation_time = 0 + + try: + modification_time = olecf_item.get_modification_time_as_integer() + except OverflowError as exception: + logging.warning( + u'Unable to read the modification time with error: {0:s}'.format( + exception)) + modification_time = 0 + + # If no useful events, return early. + if not creation_time and not modification_time: + return None, None + + # Office template documents sometimes contain a creation time + # of -1 (0xffffffffffffffff). + if creation_time == 0xffffffffffffffffL: + creation_time = 0 + + return creation_time, modification_time + + @abc.abstractmethod + def ParseItems( + self, parser_context, file_entry=None, parser_chain=None, root_item=None, + items=None, **kwargs): + """Parses OLECF items. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + root_item: Optional root item of the OLECF file. The default is None. + item_names: Optional list of all items discovered in the root. + The default is None. + """ + + def Process( + self, parser_context, file_entry=None, parser_chain=None, root_item=None, + item_names=None, **kwargs): + """Determine if this is the right plugin for this OLECF file. + + This function takes a list of sub items found in the root of a + OLECF file and compares that to a list of required items defined + in this plugin. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + root_item: Optional root item of the OLECF file. The default is None. + item_names: Optional list of all items discovered in the root. + The default is None. + + Raises: + errors.WrongPlugin: If the set of required items is not a subset + of the available items. + ValueError: If the root_item or items are not set. + """ + if root_item is None or item_names is None: + raise ValueError(u'Root item or items are not set.') + + if not frozenset(item_names) >= self.REQUIRED_ITEMS: + raise errors.WrongPlugin( + u'Not the correct items for: {0:s}'.format(self.NAME)) + + # This will raise if unhandled keyword arguments are passed. + super(OlecfPlugin, self).Process(parser_context, **kwargs) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + items = [] + for item_string in self.REQUIRED_ITEMS: + item = root_item.get_sub_item_by_name(item_string) + + if item: + items.append(item) + + self.ParseItems( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + root_item=root_item, items=items) + + +class OleDefinitions(object): + """Convenience class for OLE definitions.""" + + VT_I2 = 0x0002 + VT_I4 = 0x0003 + VT_BOOL = 0x000b + VT_LPSTR = 0x001e + VT_LPWSTR = 0x001e + VT_FILETIME = 0x0040 + VT_CF = 0x0047 diff --git a/plaso/parsers/olecf_plugins/summary.py b/plaso/parsers/olecf_plugins/summary.py new file mode 100644 index 0000000..d1b7e62 --- /dev/null +++ b/plaso/parsers/olecf_plugins/summary.py @@ -0,0 +1,433 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plugin to parse the OLECF summary/document summary information items.""" + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import olecf +from plaso.parsers.olecf_plugins import interface + + +class OleCfSummaryInfoEvent(time_events.FiletimeEvent): + """Convenience class for an OLECF Summary info event.""" + + DATA_TYPE = 'olecf:summary_info' + + def __init__(self, timestamp, usage, attributes): + """Initializes the event. + + Args: + timestamp: The FILETIME timestamp value. + usage: The usage string, describing the timestamp value. + attributes: A dict object containing all extracted attributes. + """ + super(OleCfSummaryInfoEvent, self).__init__( + timestamp, usage) + + self.name = u'Summary Information' + + for attribute_name, attribute_value in attributes.iteritems(): + setattr(self, attribute_name, attribute_value) + + +# TODO: Move this class to a higher level (to the interface) +# so the these functions can be shared by other plugins. +class OleCfSummaryInfo(object): + """An OLECF Summary Info object.""" + + _CLASS_IDENTIFIER = 'f29f85e0-4ff9-1068-ab91-08002b27b3d9' + + _PROPERTY_NAMES_INT32 = { + 0x000e: 'number_of_pages', # PIDSI_PAGECOUNT + 0x000f: 'number_of_words', # PIDSI_WORDCOUNT + 0x0010: 'number_of_characters', # PIDSI_CHARCOUNT + 0x0013: 'security', # PIDSI_SECURITY + } + + _PROPERTY_NAMES_STRING = { + 0x0002: 'title', # PIDSI_TITLE + 0x0003: 'subject', # PIDSI_SUBJECT + 0x0004: 'author', # PIDSI_AUTHOR + 0x0005: 'keywords', # PIDSI_KEYWORDS + 0x0006: 'comments', # PIDSI_COMMENTS + 0x0007: 'template', # PIDSI_TEMPLATE + 0x0008: 'last_saved_by', # PIDSI_LASTAUTHOR + 0x0009: 'revision_number', # PIDSI_REVNUMBER + 0x0012: 'application', # PIDSI_APPNAME + } + + PIDSI_CODEPAGE = 0x0001 + PIDSI_EDITTIME = 0x000a + PIDSI_LASTPRINTED = 0x000b + PIDSI_CREATE_DTM = 0x000c + PIDSI_LASTSAVE_DTM = 0x000d + PIDSI_THUMBNAIL = 0x0011 + + def __init__(self, olecf_item): + """Initialize the OLECF summary object. + + Args: + olecf_item: The OLECF item (instance of pyolecf.property_set_stream). + """ + super(OleCfSummaryInfo, self).__init__() + self.attributes = {} + self.events = [] + + self._InitFromPropertySet(olecf_item.set) + + def _InitFromPropertySet(self, property_set): + """Initializes the object from a property set. + + Args: + property_set: The OLECF property set (pyolecf.property_set). + """ + # Combine the values of multiple property sections + # but do not override properties that are already set. + for property_section in property_set.sections: + if property_section.class_identifier != self._CLASS_IDENTIFIER: + continue + for property_value in property_section.properties: + self._InitFromPropertyValue(property_value) + + def _InitFromPropertyValue(self, property_value): + """Initializes the object from a property value. + + Args: + property_value: The OLECF property value (pyolecf.property_value). + """ + if property_value.type == interface.OleDefinitions.VT_I2: + self._InitFromPropertyValueTypeInt16(property_value) + + elif property_value.type == interface.OleDefinitions.VT_I4: + self._InitFromPropertyValueTypeInt32(property_value) + + elif (property_value.type == interface.OleDefinitions.VT_LPSTR or + property_value.type == interface.OleDefinitions.VT_LPWSTR): + self._InitFromPropertyValueTypeString(property_value) + + elif property_value.type == interface.OleDefinitions.VT_FILETIME: + self._InitFromPropertyValueTypeFiletime(property_value) + + def _InitFromPropertyValueTypeInt16(self, property_value): + """Initializes the object from a 16-bit int type property value. + + Args: + property_value: The OLECF property value (pyolecf.property_value + of type VT_I2). + """ + if property_value.identifier == self.PIDSI_CODEPAGE: + # TODO: can the codepage vary per property section? + # And is it needed to interpret the ASCII strings? + # codepage = property_value.data_as_integer + pass + + def _InitFromPropertyValueTypeInt32(self, property_value): + """Initializes the object from a 32-bit int type property value. + + Args: + property_value: The OLECF property value (pyolecf.property_value + of type VT_I4). + """ + property_name = self._PROPERTY_NAMES_INT32.get( + property_value.identifier, None) + + if property_name and not property_name in self.attributes: + self.attributes[property_name] = property_value.data_as_integer + + def _InitFromPropertyValueTypeString(self, property_value): + """Initializes the object from a string type property value. + + Args: + property_value: The OLECF property value (pyolecf.property_value + of type VT_LPSTR or VT_LPWSTR). + """ + property_name = self._PROPERTY_NAMES_STRING.get( + property_value.identifier, None) + + if property_name and not property_name in self.attributes: + self.attributes[property_name] = property_value.data_as_string + + def _InitFromPropertyValueTypeFiletime(self, property_value): + """Initializes the object from a filetime type property value. + + Args: + property_value: The OLECF property value (pyolecf.property_value + of type VT_FILETIME). + """ + if property_value.identifier == self.PIDSI_LASTPRINTED: + self.events.append( + (property_value.data_as_integer, 'Document Last Printed Time')) + + elif property_value.identifier == self.PIDSI_CREATE_DTM: + self.events.append( + (property_value.data_as_integer, 'Document Creation Time')) + + elif property_value.identifier == self.PIDSI_LASTSAVE_DTM: + self.events.append( + (property_value.data_as_integer, 'Document Last Save Time')) + + elif property_value.identifier == self.PIDSI_EDITTIME: + # property_name = 'total_edit_time' + # TODO: handle duration. + pass + + +class OleCfDocumentSummaryInfoEvent(time_events.FiletimeEvent): + """Convenience class for an OLECF Document Summary info event.""" + + DATA_TYPE = 'olecf:document_summary_info' + + _CLASS_IDENTIFIER = 'd5cdd502-2e9c-101b-9397-08002b2cf9ae' + + _PROPERTY_NAMES_BOOL = { + 0x0013: 'shared_document', # PIDDSI_SHAREDDOC + } + + _PROPERTY_NAMES_INT32 = { + 0x0004: 'number_of_bytes', # PIDDSI_BYTECOUNT + 0x0005: 'number_of_lines', # PIDDSI_LINECOUNT + 0x0006: 'number_of_paragraphs', # PIDDSI_PARCOUNT + 0x0007: 'number_of_slides', # PIDDSI_SLIDECOUNT + 0x0008: 'number_of_notes', # PIDDSI_NOTECOUNT + 0x0009: 'number_of_hidden_slides', # PIDDSI_HIDDENCOUNT + 0x000a: 'number_of_clips', # PIDDSI_MMCLIPCOUNT + 0x0011: 'number_of_characters_with_white_space', # PIDDSI_CCHWITHSPACES + 0x0017: 'application_version', # PIDDSI_VERSION + } + + _PROPERTY_NAMES_STRING = { + 0x000e: 'manager', # PIDDSI_MANAGER + 0x000f: 'company', # PIDDSI_COMPANY + 0x001a: 'content_type', # PIDDSI_CONTENTTYPE + 0x001b: 'content_status', # PIDDSI_CONTENTSTATUS + 0x001c: 'language', # PIDDSI_LANGUAGE + 0x001d: 'document_version', # PIDDSI_DOCVERSION + } + + PIDDSI_CODEPAGE = 0x0001 + PIDDSI_CATEGORY = 0x0002 + PIDDSI_PRESFORMAT = 0x0003 + PIDDSI_SCALE = 0x000b + PIDDSI_HEADINGPAIR = 0x000c + PIDDSI_DOCPARTS = 0x000d + PIDDSI_LINKSDIRTY = 0x0010 + PIDDSI_VERSION = 0x0017 + + def __init__(self, timestamp, usage, olecf_item): + """Initializes the event. + + Args: + timestamp: The FILETIME timestamp value. + usage: The usage string, describing the timestamp value. + olecf_item: The OLECF item (pyolecf.property_set_stream). + """ + super(OleCfDocumentSummaryInfoEvent, self).__init__( + timestamp, usage) + + self.name = u'Document Summary Information' + + self._InitFromPropertySet(olecf_item.set) + + def _InitFromPropertySet(self, property_set): + """Initializes the event from a property set. + + Args: + property_set: The OLECF property set (pyolecf.property_set). + """ + # Combine the values of multiple property sections + # but do not override properties that are already set. + for property_section in property_set.sections: + if property_section.class_identifier != self._CLASS_IDENTIFIER: + continue + for property_value in property_section.properties: + self._InitFromPropertyValue(property_value) + + def _InitFromPropertyValue(self, property_value): + """Initializes the event from a property value. + + Args: + property_value: The OLECF property value (pyolecf.property_value). + """ + if property_value.type == interface.OleDefinitions.VT_I2: + self._InitFromPropertyValueTypeInt16(property_value) + + elif property_value.type == interface.OleDefinitions.VT_I4: + self._InitFromPropertyValueTypeInt32(property_value) + + elif property_value.type == interface.OleDefinitions.VT_BOOL: + self._InitFromPropertyValueTypeBool(property_value) + + elif (property_value.type == interface.OleDefinitions.VT_LPSTR or + property_value.type == interface.OleDefinitions.VT_LPWSTR): + self._InitFromPropertyValueTypeString(property_value) + + def _InitFromPropertyValueTypeInt16(self, property_value): + """Initializes the event from a 16-bit int type property value. + + Args: + property_value: The OLECF property value (pyolecf.property_value + of type VT_I2). + """ + if property_value.identifier == self.PIDDSI_CODEPAGE: + # TODO: can the codepage vary per property section? + # And is it needed to interpret the ASCII strings? + # codepage = property_value.data_as_integer + pass + + def _InitFromPropertyValueTypeInt32(self, property_value): + """Initializes the event from a 32-bit int type property value. + + Args: + property_value: The OLECF property value (pyolecf.property_value + of type VT_I4). + """ + property_name = self._PROPERTY_NAMES_INT32.get( + property_value.identifier, None) + + # The application version consists of 2 16-bit values that make up + # the version number. Where the upper 16-bit is the major number + # and the lower 16-bit the minor number. + if property_value.identifier == self.PIDDSI_VERSION: + application_version = property_value.data_as_integer + setattr(self, property_name, u'{0:d}.{1:d}'.format( + application_version >> 16, application_version & 0xffff)) + + elif property_name and not hasattr(self, property_name): + setattr(self, property_name, property_value.data_as_integer) + + def _InitFromPropertyValueTypeBool(self, property_value): + """Initializes the event from a boolean type property value. + + Args: + property_value: The OLECF property value (pyolecf.property_value + of type VT_BOOL). + """ + property_name = self._PROPERTY_NAMES_BOOL.get( + property_value.identifier, None) + + if property_name and not hasattr(self, property_name): + setattr(self, property_name, property_value.data_as_boolean) + + def _InitFromPropertyValueTypeString(self, property_value): + """Initializes the event from a string type property value. + + Args: + property_value: The OLECF property value (pyolecf.property_value + of type VT_LPSTR or VT_LPWSTR). + """ + property_name = self._PROPERTY_NAMES_STRING.get( + property_value.identifier, None) + + if property_name and not hasattr(self, property_name): + setattr(self, property_name, property_value.data_as_string) + + +class DocumentSummaryOlecfPlugin(interface.OlecfPlugin): + """Plugin that parses DocumentSummaryInformation item from an OLECF file.""" + + NAME = 'olecf_document_summary' + DESCRIPTION = u'Parser for a DocumentSummaryInformation OLECF stream.' + + # pylint: disable=anomalous-backslash-in-string + REQUIRED_ITEMS = frozenset([u'\005DocumentSummaryInformation']) + + def ParseItems( + self, parser_context, file_entry=None, parser_chain=None, root_item=None, + items=None, **unused_kwargs): + """Parses a document summary information OLECF item. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + root_item: Optional root item of the OLECF file. The default is None. + item_names: Optional list of all items discovered in the root. + The default is None. + """ + root_creation_time, root_modification_time = self.GetTimestamps(root_item) + + for item in items: + if root_creation_time: + event_object = OleCfDocumentSummaryInfoEvent( + root_creation_time, eventdata.EventTimestamp.CREATION_TIME, item) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if root_modification_time: + event_object = OleCfDocumentSummaryInfoEvent( + root_modification_time, eventdata.EventTimestamp.MODIFICATION_TIME, + item) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +class SummaryInfoOlecfPlugin(interface.OlecfPlugin): + """Plugin that parses the SummaryInformation item from an OLECF file.""" + + NAME = 'olecf_summary' + DESCRIPTION = u'Parser for a SummaryInformation OLECF stream.' + + # pylint: disable=anomalous-backslash-in-string + REQUIRED_ITEMS = frozenset([u'\005SummaryInformation']) + + def ParseItems( + self, parser_context, file_entry=None, parser_chain=None, root_item=None, + items=None, **unused_kwargs): + """Parses a summary information OLECF item. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + root_item: Optional root item of the OLECF file. The default is None. + item_names: Optional list of all items discovered in the root. + The default is None. + """ + root_creation_time, root_modification_time = self.GetTimestamps(root_item) + + for item in items: + summary_information_object = OleCfSummaryInfo(item) + + for timestamp, timestamp_description in summary_information_object.events: + event_object = OleCfSummaryInfoEvent( + timestamp, timestamp_description, + summary_information_object.attributes) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if root_creation_time: + event_object = OleCfSummaryInfoEvent( + root_creation_time, eventdata.EventTimestamp.CREATION_TIME, + summary_information_object.attributes) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if root_modification_time: + event_object = OleCfSummaryInfoEvent( + root_modification_time, eventdata.EventTimestamp.MODIFICATION_TIME, + summary_information_object.attributes) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +olecf.OleCfParser.RegisterPlugins( + [DocumentSummaryOlecfPlugin, SummaryInfoOlecfPlugin]) diff --git a/plaso/parsers/olecf_plugins/summary_test.py b/plaso/parsers/olecf_plugins/summary_test.py new file mode 100644 index 0000000..edef6ca --- /dev/null +++ b/plaso/parsers/olecf_plugins/summary_test.py @@ -0,0 +1,129 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the OLE Compound File summary and document summary plugins.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import olecf as olecf_formatter +from plaso.lib import timelib_test +from plaso.parsers.olecf_plugins import summary +from plaso.parsers.olecf_plugins import test_lib + + +class TestSummaryInfoOlecfPlugin(test_lib.OleCfPluginTestCase): + """Tests for the OLECF summary information plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._summary_plugin = summary.SummaryInfoOlecfPlugin() + self._test_file = self._GetTestFilePath(['Document.doc']) + + def testProcess(self): + """Tests the Process function on a SummaryInformation stream.""" + event_queue_consumer = self._ParseOleCfFileWithPlugin( + self._test_file, self._summary_plugin) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # There is one summary info stream with three event objects. + self.assertEquals(len(event_objects), 3) + + event_object = event_objects[0] + self.assertEquals(event_object.name, u'Summary Information') + + self.assertEquals(event_object.title, u'Table of Context') + self.assertEquals(event_object.author, u'DAVID NIDES') + self.assertEquals(event_object.template, u'Normal.dotm') + self.assertEquals(event_object.last_saved_by, u'Nides') + self.assertEquals(event_object.revision_number, u'4') + self.assertEquals(event_object.number_of_characters, 18) + self.assertEquals(event_object.application, u'Microsoft Office Word') + self.assertEquals(event_object.security, 0) + + self.assertEquals(event_object.timestamp_desc, u'Document Creation Time') + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-12-10 18:38:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'Title: Table of Context ' + u'Author: DAVID NIDES ' + u'Template: Normal.dotm ' + u'Revision number: 4 ' + u'Last saved by: Nides ' + u'Number of pages: 1 ' + u'Number of words: 3 ' + u'Number of characters: 18 ' + u'Application: Microsoft Office Word ' + u'Security: 0') + + expected_msg_short = ( + u'Title: Table of Context ' + u'Author: DAVID NIDES ' + u'Revision number: 4') + + # TODO: add support for: + # u'Total edit time (secs): 0 ' + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +class TestDocumentSummaryInfoOlecfPlugin(test_lib.OleCfPluginTestCase): + """Tests for the OLECF document summary information plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._document_summary_plugin = summary.DocumentSummaryOlecfPlugin() + self._test_file = self._GetTestFilePath(['Document.doc']) + + def testProcess(self): + """Tests the Process function on a DocumentSummaryInformation stream.""" + event_queue_consumer = self._ParseOleCfFileWithPlugin( + self._test_file, self._document_summary_plugin) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # There should only be one summary info stream with one event. + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + self.assertEquals(event_object.name, u'Document Summary Information') + + self.assertEquals(event_object.number_of_lines, 1) + self.assertEquals(event_object.number_of_paragraphs, 1) + self.assertEquals(event_object.company, u'KPMG') + self.assertFalse(event_object.shared_document) + self.assertEquals(event_object.application_version, u'14.0') + + # TODO: add support for: + # self.assertEquals(event_object.is_shared, False) + + expected_msg = ( + u'Number of lines: 1 ' + u'Number of paragraphs: 1 ' + u'Company: KPMG ' + u'Shared document: False ' + u'Application version: 14.0') + + expected_msg_short = ( + u'Company: KPMG') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/olecf_plugins/test_lib.py b/plaso/parsers/olecf_plugins/test_lib.py new file mode 100644 index 0000000..6ae019b --- /dev/null +++ b/plaso/parsers/olecf_plugins/test_lib.py @@ -0,0 +1,84 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""OLECF plugin related functions and classes for testing.""" + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +import pyolecf + +from plaso.engine import single_process +from plaso.parsers import test_lib + + +class OleCfPluginTestCase(test_lib.ParserTestCase): + """The unit test case for OLE CF based plugins.""" + + def _OpenOleCfFile(self, path, codepage='cp1252'): + """Opens an OLE compound file and returns back a pyolecf.file object. + + Args: + path: The path to the OLE CF test file. + codepate: Optional codepage. The default is cp1252. + """ + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=path) + file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) + + file_object = file_entry.GetFileObject() + olecf_file = pyolecf.file() + olecf_file.set_ascii_codepage(codepage) + + olecf_file.open_file_object(file_object) + + return olecf_file + + def _ParseOleCfFileWithPlugin( + self, path, plugin_object, knowledge_base_values=None): + """Parses a file as an OLE compound file and returns an event generator. + + Args: + path: The path to the OLE CF test file. + plugin_object: The plugin object that is used to extract an event + generator. + knowledge_base_values: optional dict containing the knowledge base + values. The default is None. + + Returns: + An event object queue consumer object (instance of + TestEventObjectQueueConsumer). + """ + event_queue = single_process.SingleProcessQueue() + event_queue_consumer = test_lib.TestEventObjectQueueConsumer(event_queue) + + parse_error_queue = single_process.SingleProcessQueue() + + parser_context = self._GetParserContext( + event_queue, parse_error_queue, + knowledge_base_values=knowledge_base_values) + olecf_file = self._OpenOleCfFile(path) + + # Get a list of all root items from the OLE CF file. + root_item = olecf_file.root_item + item_names = [item.name for item in root_item.sub_items] + + plugin_object.Process( + parser_context, root_item=root_item, item_names=item_names) + + return event_queue_consumer diff --git a/plaso/parsers/opera.py b/plaso/parsers/opera.py new file mode 100644 index 0000000..92944f5 --- /dev/null +++ b/plaso/parsers/opera.py @@ -0,0 +1,326 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parsers for Opera Browser history files.""" + +import logging +import os +import urllib2 + +from dfvfs.helpers import text_file +from xml.etree import ElementTree + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.lib import utils +from plaso.parsers import interface +from plaso.parsers import manager + + +class OperaTypedHistoryEvent(event.EventObject): + """An EventObject for an Opera typed history entry.""" + + DATA_TYPE = 'opera:history:typed_entry' + + def __init__(self, last_typed_time, url, entry_type): + """A constructor for the typed history event. + + Args: + last_typed_time: A ISO 8601 string denoting the last time + the URL was typed into a browser. + url: The url, or the typed hostname. + entry_type: A string indicating whether the URL was directly + typed in or the result of the user choosing from the + auto complete (based on prior history). + """ + super(OperaTypedHistoryEvent, self).__init__() + self.url = url + self.entry_type = entry_type + + if entry_type == 'selected': + self.entry_selection = 'Filled from autocomplete.' + elif entry_type == 'text': + self.entry_selection = 'Manually typed.' + + self.timestamp = timelib.Timestamp.FromTimeString(last_typed_time) + self.timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME + + +class OperaGlobalHistoryEvent(time_events.PosixTimeEvent): + """An EventObject for an Opera global history entry.""" + + DATA_TYPE = 'opera:history:entry' + + def __init__(self, timestamp, url, title, popularity_index): + """Initialize the event object.""" + super(OperaGlobalHistoryEvent, self).__init__( + timestamp, eventdata.EventTimestamp.PAGE_VISITED, self.DATA_TYPE) + + self.url = url + if title != url: + self.title = title + + self.popularity_index = popularity_index + + if popularity_index < 0: + self.description = 'First and Only Visit' + else: + self.description = 'Last Visit' + + +class OperaTypedHistoryParser(interface.BaseParser): + """Parses the Opera typed_history.xml file.""" + + NAME = 'opera_typed_history' + DESCRIPTION = u'Parser for Opera typed_history.xml files.' + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from an Opera typed history file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + file_object.seek(0, os.SEEK_SET) + + text_file_object = text_file.TextFile(file_object) + + # Need to verify the first line to make sure this is a) XML and + # b) the right XML. + first_line = text_file_object.readline(90) + + if not first_line.startswith('': + file_object.close() + raise errors.UnableToParseFile( + u'Not an Opera typed history file [wrong XML root key]') + + # For ElementTree to work we need to work on a file object seeked + # to the beginning. + file_object.seek(0, os.SEEK_SET) + + xml = ElementTree.parse(file_object) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + for history_item in xml.iterfind('typed_history_item'): + content = history_item.get('content', '') + last_typed = history_item.get('last_typed', '') + entry_type = history_item.get('type', '') + + event_object = OperaTypedHistoryEvent(last_typed, content, entry_type) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + file_object.close() + + +class OperaGlobalHistoryParser(interface.BaseParser): + """Parses the Opera global_history.dat file.""" + + NAME = 'opera_global' + DESCRIPTION = u'Parser for Opera global_history.dat files.' + + _SUPPORTED_URL_SCHEMES = frozenset(['file', 'http', 'https', 'ftp']) + + def _IsValidUrl(self, url): + """A simple test to see if an URL is considered valid.""" + parsed_url = urllib2.urlparse.urlparse(url) + + # Few supported first URL entries. + if parsed_url.scheme in self._SUPPORTED_URL_SCHEMES: + return True + + return False + + def _ReadRecord(self, text_file_object, max_line_length=0): + """Return a single record from an Opera global_history file. + + A single record consists of four lines, with each line as: + Title of page (or the URL if not there). + Website URL. + Timestamp in POSIX time. + Popularity index (-1 if first time visited). + + Args: + text_file_object: A text file object (instance of dfvfs.TextFile). + max_line_length: An integer that denotes the maximum byte + length for each line read. + + Returns: + A tuple of: title, url, timestamp, popularity_index. + + Raises: + errors.NotAText: If the file being read is not a text file. + """ + if max_line_length: + title_raw = text_file_object.readline(max_line_length) + if len(title_raw) == max_line_length and not title_raw.endswith('\n'): + return None, None, None, None + if not utils.IsText(title_raw): + raise errors.NotAText(u'Title line is not a text.') + title = title_raw.strip() + else: + title = text_file_object.readline().strip() + + if not title: + return None, None, None, None + + url = text_file_object.readline().strip() + + if not url: + return None, None, None, None + + timestamp_line = text_file_object.readline().strip() + popularity_line = text_file_object.readline().strip() + + try: + timestamp = int(timestamp_line, 10) + except ValueError: + if len(timestamp_line) > 30: + timestamp_line = timestamp_line[0:30] + logging.debug(u'Unable to read in timestamp [{!r}]'.format( + timestamp_line)) + return None, None, None, None + + try: + popularity_index = int(popularity_line, 10) + except ValueError: + try: + logging.debug(u'Unable to read in popularity index[{}]'.format( + popularity_line)) + except UnicodeDecodeError: + logging.debug( + u'Unable to read in popularity index [unable to print ' + u'bad line]') + return None, None, None, None + + # Try to get the data into unicode. + try: + title_unicode = title.decode('utf-8') + except UnicodeDecodeError: + partial_title = title.decode('utf-8', 'ignore') + title_unicode = u'Warning: partial line, starts with: {}'.format( + partial_title) + + return title_unicode, url, timestamp, popularity_index + + def _ReadRecords(self, text_file_object): + """Yield records read from an Opera global_history file. + + A single record consists of four lines, with each line as: + Title of page (or the URL if not there). + Website URL. + Timestamp in POSIX time. + Popularity index (-1 if first time visited). + + Args: + text_file_object: A text file object (instance of dfvfs.TextFile). + + Yields: + A tuple of: title, url, timestamp, popularity_index. + """ + while True: + title, url, timestamp, popularity_index = self._ReadRecord( + text_file_object) + + if not title: + raise StopIteration + if not url: + raise StopIteration + if not popularity_index: + raise StopIteration + + yield title, url, timestamp, popularity_index + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from an Opera global history file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + file_object.seek(0, os.SEEK_SET) + + text_file_object = text_file.TextFile(file_object) + + try: + title, url, timestamp, popularity_index = self._ReadRecord( + text_file_object, 400) + except errors.NotAText: + file_object.close() + raise errors.UnableToParseFile( + u'Not an Opera history file [not a text file].') + + if not title: + file_object.close() + raise errors.UnableToParseFile( + u'Not an Opera history file [no title present].') + + if not self._IsValidUrl(url): + file_object.close() + raise errors.UnableToParseFile( + u'Not an Opera history file [not a valid URL].') + + if not timestamp: + file_object.close() + raise errors.UnableToParseFile( + u'Not an Opera history file [timestamp does not exist].') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + event_object = OperaGlobalHistoryEvent( + timestamp, url, title, popularity_index) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + # Read in the rest of the history file. + for title, url, timestamp, popularity_index in self._ReadRecords( + text_file_object): + event_object = OperaGlobalHistoryEvent( + timestamp, url, title, popularity_index) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + file_object.close() + + +manager.ParsersManager.RegisterParsers([ + OperaTypedHistoryParser, OperaGlobalHistoryParser]) diff --git a/plaso/parsers/opera_test.py b/plaso/parsers/opera_test.py new file mode 100644 index 0000000..9b130a3 --- /dev/null +++ b/plaso/parsers/opera_test.py @@ -0,0 +1,118 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Opera browser history parsers.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import opera as opera_formatter +from plaso.lib import timelib_test +from plaso.parsers import opera +from plaso.parsers import test_lib + + +class OperaTypedParserTest(test_lib.ParserTestCase): + """Tests for the Opera Typed History parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = opera.OperaTypedHistoryParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['typed_history.xml']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 4) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-11 23:45:27') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals(event_object.entry_selection, 'Filled from autocomplete.') + + expected_string = u'plaso.kiddaland.net (Filled from autocomplete.)' + + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + event_object = event_objects[3] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-11 22:46:07') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals(event_object.entry_selection, 'Manually typed.') + + expected_string = u'theonion.com (Manually typed.)' + + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + +class OperaGlobalParserTest(test_lib.ParserTestCase): + """Tests for the Opera Global History parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = opera.OperaGlobalHistoryParser() + + def testParseFile(self): + """Read a history file and run a few tests.""" + test_file = self._GetTestFilePath(['global_history.dat']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 37) + + event_object = event_objects[4] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-11 22:45:46') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'http://www.mbl.is/frettir/erlent/2013/11/11/' + u'karl_bretaprins_faer_ellilifeyri/ (Karl Bretaprins fær ellilífeyri' + u' - mbl.is) [First and Only Visit]') + expected_msg_short = ( + u'http://www.mbl.is/frettir/erlent/2013/11/11/' + u'karl_bretaprins_faer_ellilifeyri/...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[10] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-11 22:45:55') + self.assertEquals(event_object.timestamp, expected_timestamp) + + event_object = event_objects[16] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-11 22:46:16') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_title = ( + u'10 Celebrities You Never Knew Were Abducted And Murdered ' + u'By Andie MacDowell | The Onion - America\'s Finest News Source') + + self.assertEquals(event_object.title, expected_title) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/oxml.py b/plaso/parsers/oxml.py new file mode 100644 index 0000000..90e48fb --- /dev/null +++ b/plaso/parsers/oxml.py @@ -0,0 +1,183 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a parser for OXML files (i.e. MS Office 2007+).""" + +import logging +import re +import struct +import zipfile + +from xml.etree import ElementTree + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class OpenXMLParserEvent(time_events.TimestampEvent): + """Process timestamps from MS Office XML Events.""" + + DATA_TYPE = 'metadata:openxml' + + def __init__(self, timestamp_string, usage, metadata): + """Initializes the event object. + + Args: + timestamp_string: An ISO 8601 representation of a timestamp. + usage: The description of the usage of the time value. + metadata: A dict object containing extracted metadata. + """ + timestamp = timelib.Timestamp.FromTimeString(timestamp_string) + super(OpenXMLParserEvent, self).__init__(timestamp, usage, self.DATA_TYPE) + for key, value in metadata.iteritems(): + setattr(self, key, value) + + +class OpenXMLParser(interface.BaseParser): + """Parse metadata from OXML files.""" + + NAME = 'openxml' + DESCRIPTION = u'Parser for OpenXML (OXML) files.' + + _METAKEY_TRANSLATE = { + 'creator': 'author', + 'lastModifiedBy': 'last_saved_by', + 'Total_Time': 'total_edit_time', + 'Pages': 'num_pages', + 'Characters_with_spaces': 'num_chars_w_spaces', + 'Paragraphs': 'num_paragraphs', + 'Characters': 'num_chars', + 'Lines': 'num_lines', + 'revision': 'revision_num', + 'Words': 'num_words', + 'Application': 'creating_app', + 'Shared_Doc': 'shared', + } + + _FILES_REQUIRED = frozenset([ + '[Content_Types].xml', '_rels/.rels', 'docProps/core.xml']) + + def _FixString(self, key): + """Convert CamelCase to lower_with_underscore.""" + # TODO: Add unicode support. + fix_key = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', key) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', fix_key).lower() + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from an OXML file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + + if not zipfile.is_zipfile(file_object): + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file: {1:s} with error: {2:s}'.format( + self.NAME, file_entry.name, 'Not a Zip file.')) + + try: + zip_container = zipfile.ZipFile(file_object, 'r') + except (zipfile.BadZipfile, struct.error, zipfile.LargeZipFile): + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file: {1:s} with error: {2:s}'.format( + self.NAME, file_entry.name, 'Bad Zip file.')) + + zip_name_list = set(zip_container.namelist()) + + if not self._FILES_REQUIRED.issubset(zip_name_list): + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file: {1:s} with error: {2:s}'.format( + self.NAME, file_entry.name, 'OXML element(s) missing.')) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + metadata = {} + timestamps = {} + + try: + rels_xml = zip_container.read('_rels/.rels') + except zipfile.BadZipfile as exception: + logging.error( + u'Unable to parse file {0:s} with error: {1:s}'.format( + file_entry.name, exception)) + return + + rels_root = ElementTree.fromstring(rels_xml) + + for properties in rels_root.iter(): + if 'properties' in repr(properties.get('Type')): + try: + xml = zip_container.read(properties.get('Target')) + root = ElementTree.fromstring(xml) + except ( + OverflowError, IndexError, KeyError, ValueError, + zipfile.BadZipfile) as exception: + logging.warning( + u'[{0:s}] unable to read property with error: {1:s}.'.format( + self.NAME, exception)) + continue + + for element in root.iter(): + if element.text: + _, _, tag = element.tag.partition('}') + # Not including the 'lpstr' attribute because it is + # very verbose. + if tag == 'lpstr': + continue + + if tag in ('created', 'modified', 'lastPrinted'): + timestamps[tag] = element.text + else: + tag_name = self._METAKEY_TRANSLATE.get(tag, self._FixString(tag)) + metadata[tag_name] = element.text + + if timestamps.get('created', None): + event_object = OpenXMLParserEvent( + timestamps.get('created'), eventdata.EventTimestamp.CREATION_TIME, + metadata) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if timestamps.get('modified', None): + event_object = OpenXMLParserEvent( + timestamps.get('modified'), + eventdata.EventTimestamp.MODIFICATION_TIME, metadata) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if timestamps.get('lastPrinted', None): + event_object = OpenXMLParserEvent( + timestamps.get('lastPrinted'), eventdata.EventTimestamp.LAST_PRINTED, + metadata) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +manager.ParsersManager.RegisterParser(OpenXMLParser) diff --git a/plaso/parsers/oxml_test.py b/plaso/parsers/oxml_test.py new file mode 100644 index 0000000..d8dc1a3 --- /dev/null +++ b/plaso/parsers/oxml_test.py @@ -0,0 +1,96 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the OXML parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import oxml as oxml_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import oxml +from plaso.parsers import test_lib + + +class OXMLTest(test_lib.ParserTestCase): + """Tests for the OXML parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = oxml.OpenXMLParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['Document.docx']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 2) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-11-07 23:29:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + + event_object = event_objects[1] + + self.assertEquals(event_object.num_chars, u'13') + self.assertEquals(event_object.total_time, u'1385') + self.assertEquals(event_object.characters_with_spaces, u'14') + self.assertEquals(event_object.i4, u'1') + self.assertEquals(event_object.app_version, u'14.0000') + self.assertEquals(event_object.num_lines, u'1') + self.assertEquals(event_object.scale_crop, u'false') + self.assertEquals(event_object.num_pages, u'1') + self.assertEquals(event_object.num_words, u'2') + self.assertEquals(event_object.links_up_to_date, u'false') + self.assertEquals(event_object.num_paragraphs, u'1') + self.assertEquals(event_object.doc_security, u'0') + self.assertEquals(event_object.hyperlinks_changed, u'false') + self.assertEquals(event_object.revision_num, u'3') + self.assertEquals(event_object.last_saved_by, u'Nides') + self.assertEquals(event_object.author, u'Nides') + self.assertEquals( + event_object.creating_app, u'Microsoft Office Word') + self.assertEquals(event_object.template, u'Normal.dotm') + + expected_msg = ( + u'Creating App: Microsoft Office Word ' + u'App version: 14.0000 ' + u'Last saved by: Nides ' + u'Author: Nides ' + u'Revision Num: 3 ' + u'Template: Normal.dotm ' + u'Num pages: 1 ' + u'Num words: 2 ' + u'Num chars: 13 ' + u'Num lines: 1 ' + u'Hyperlinks changed: false ' + u'Links up to date: false ' + u'Scale crop: false') + expected_msg_short = ( + u'Author: Nides') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/pcap.py b/plaso/parsers/pcap.py new file mode 100644 index 0000000..47df468 --- /dev/null +++ b/plaso/parsers/pcap.py @@ -0,0 +1,843 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for PCAP files.""" + +import binascii +import operator +import socket + +import dpkt + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.parsers import interface +from plaso.parsers import manager + + +__author__ = 'Dominique Kilman (lexistar97@gmail.com)' + + +def ParseDNS(dns_packet_data): + """Parse DNS packets and return a string with relevant details. + + Args: + dns_packet_data: DNS packet data. + + Returns: + Formatted DNS details. + """ + dns_data = [] + + try: + dns = dpkt.dns.DNS(dns_packet_data) + if dns.rcode is dpkt.dns.DNS_RCODE_NOERR: + if dns.get_qr() == 1: + if not dns.an: + dns_data.append('DNS Response: No answer for ') + dns_data.append(dns.qd[0].name) + else: + # Type of DNS answer. + for answer in dns.an: + if answer.type == 5: + dns_data.append('DNS-CNAME request ') + dns_data.append(answer.name) + dns_data.append(' response: ') + dns_data.append(answer.cname) + elif answer.type == 1: + dns_data.append('DNS-A request ') + dns_data.append(answer.name) + dns_data.append(' response: ') + dns_data.append(socket.inet_ntoa(answer.rdata)) + elif answer.type == 12: + dns_data.append('DNS-PTR request ') + dns_data.append(answer.name) + dns_data.append(' response: ') + dns_data.append(answer.ptrname) + elif not dns.get_qr(): + dns_data.append('DNS Query for ') + dns_data.append(dns.qd[0].name) + else: + dns_data.append('DNS error code ') + dns_data.append(str(dns.rcode)) + + except dpkt.UnpackError as exception: + dns_data.append('DNS Unpack Error: {0:s}. First 20 of data {1:s}'.format( + exception, repr(dns_packet_data[:20]))) + except IndexError as exception: + dns_data.append('DNS Index Error: {0:s}'.format(exception)) + + return u' '.join(dns_data) + + +def ParseNetBios(netbios_packet): + """Parse the netBIOS stream details. + + Args: + netbios_packet: NetBIOS packet. + + Returns: + Formatted netBIOS details. + """ + netbios_data = [] + for query in netbios_packet.qd: + netbios_data.append('NETBIOS qd:') + netbios_data.append(repr(dpkt.netbios.decode_name(query.name))) + for answer in netbios_packet.an: + netbios_data.append('NETBIOS an:') + netbios_data.append(repr(dpkt.netbios.decode_name(answer.name))) + for name in netbios_packet.ns: + netbios_data.append('NETBIOS ns:') + netbios_data.append(repr(dpkt.netbios.decode_name(name.name))) + + return u' '.join(netbios_data) + + +def TCPFlags(flag): + """Check the tcp flags for a packet for future use. + + Args: + flag: Flag value from TCP packet. + + Returns: + String with printable flags for specific packet. + """ + res = [] + if flag & dpkt.tcp.TH_FIN: + res.append('FIN') + if flag & dpkt.tcp.TH_SYN: + res.append('SYN') + if flag & dpkt.tcp.TH_RST: + res.append('RST') + if flag & dpkt.tcp.TH_PUSH: + res.append('PUSH') + if flag & dpkt.tcp.TH_ACK: + res.append('ACK') + if flag & dpkt.tcp.TH_URG: + res.append('URG') + if flag & dpkt.tcp.TH_ECE: + res.append('ECN') + if flag & dpkt.tcp.TH_CWR: + res.append('CWR') + + return '|'.join(res) + + +def ICMPTypes(packet): + """Parse the type information for the icmp packets. + + Args: + packet: ICMP packet data. + + Returns: + Formatted ICMP details. + """ + icmp_type = packet.type + icmp_code = packet.code + icmp_data = [] + icmp_data.append('ICMP') + + # TODO: Make the below code more readable. + # Possible to use lookup dict? Or method + # calls? + if icmp_type is dpkt.icmp.ICMP_CODE_NONE: + icmp_data.append('ICMP without codes') + elif icmp_type is dpkt.icmp.ICMP_ECHOREPLY: + icmp_data.append('echo reply') + elif icmp_type is dpkt.icmp.ICMP_UNREACH: + icmp_data.append('ICMP dest unreachable') + if icmp_code is dpkt.icmp.ICMP_UNREACH_NET: + icmp_data.append(': bad net') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_HOST: + icmp_data.append(': host unreachable') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_PROTO: + icmp_data.append(': bad protocol') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_PORT: + icmp_data.append(': port unreachable') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_NEEDFRAG: + icmp_data.append(': IP_DF caused drop') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_SRCFAIL: + icmp_data.append(': src route failed') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_NET_UNKNOWN: + icmp_data.append(': unknown net') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_HOST_UNKNOWN: + icmp_data.append(': unknown host') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_ISOLATED: + icmp_data.append(': src host isolated') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_NET_PROHIB: + icmp_data.append(': for crypto devs') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_HOST_PROHIB: + icmp_data.append(': for cypto devs') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_TOSNET: + icmp_data.append(': bad tos for net') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_TOSHOST: + icmp_data.append(': bad tos for host') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_FILTER_PROHIB: + icmp_data.append(': prohibited access') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_HOST_PRECEDENCE: + icmp_data.append(': precedence error') + elif icmp_code is dpkt.icmp.ICMP_UNREACH_PRECEDENCE_CUTOFF: + icmp_data.append(': precedence cutoff') + elif icmp_type is dpkt.icmp.ICMP_SRCQUENCH: + icmp_data.append('ICMP source quench') + elif icmp_type is dpkt.icmp.ICMP_REDIRECT: + icmp_data.append('ICMP Redirect') + if icmp_code is dpkt.icmp.ICMP_REDIRECT_NET: + icmp_data.append(' for network') + elif icmp_code is dpkt.icmp.ICMP_REDIRECT_HOST: + icmp_data.append(' for host') + elif icmp_code is dpkt.icmp.ICMP_REDIRECT_TOSNET: + icmp_data.append(' for tos and net') + elif icmp_code is dpkt.icmp.ICMP_REDIRECT_TOSHOST: + icmp_data.append(' for tos and host') + elif icmp_type is dpkt.icmp.ICMP_ALTHOSTADDR: + icmp_data.append('ICMP alternate host address') + elif icmp_type is dpkt.icmp.ICMP_ECHO: + icmp_data.append('ICMP echo') + elif icmp_type is dpkt.icmp.ICMP_RTRADVERT: + icmp_data.append('ICMP Route advertisement') + if icmp_code is dpkt.icmp.ICMP_RTRADVERT_NORMAL: + icmp_data.append(': normal') + elif icmp_code is dpkt.icmp.ICMP_RTRADVERT_NOROUTE_COMMON: + icmp_data.append(': selective routing') + elif icmp_type is dpkt.icmp.ICMP_RTRSOLICIT: + icmp_data.append('ICMP Router solicitation') + elif icmp_type is dpkt.icmp.ICMP_TIMEXCEED: + icmp_data.append('ICMP time exceeded, code:') + if icmp_code is dpkt.icmp.ICMP_TIMEXCEED_INTRANS: + icmp_data.append(' ttl==0 in transit') + elif icmp_code is dpkt.icmp.ICMP_TIMEXCEED_REASS: + icmp_data.append('ttl==0 in reass') + elif icmp_type is dpkt.icmp.ICMP_PARAMPROB: + icmp_data.append('ICMP ip header bad') + if icmp_code is dpkt.icmp.ICMP_PARAMPROB_ERRATPTR: + icmp_data.append(':req. opt. absent') + elif icmp_code is dpkt.icmp.ICMP_PARAMPROB_OPTABSENT: + icmp_data.append(': req. opt. absent') + elif icmp_code is dpkt.icmp.ICMP_PARAMPROB_LENGTH: + icmp_data.append(': length') + elif icmp_type is dpkt.icmp.ICMP_TSTAMP: + icmp_data.append('ICMP timestamp request') + elif icmp_type is dpkt.icmp.ICMP_TSTAMPREPLY: + icmp_data.append('ICMP timestamp reply') + elif icmp_type is dpkt.icmp.ICMP_INFO: + icmp_data.append('ICMP information request') + elif icmp_type is dpkt.icmp.ICMP_INFOREPLY: + icmp_data.append('ICMP information reply') + elif icmp_type is dpkt.icmp.ICMP_MASK: + icmp_data.append('ICMP address mask request') + elif icmp_type is dpkt.icmp.ICMP_MASKREPLY: + icmp_data.append('ICMP address mask reply') + elif icmp_type is dpkt.icmp.ICMP_TRACEROUTE: + icmp_data.append('ICMP traceroute') + elif icmp_type is dpkt.icmp.ICMP_DATACONVERR: + icmp_data.append('ICMP data conversion error') + elif icmp_type is dpkt.icmp.ICMP_MOBILE_REDIRECT: + icmp_data.append('ICMP mobile host redirect') + elif icmp_type is dpkt.icmp.ICMP_IP6_WHEREAREYOU: + icmp_data.append('ICMP IPv6 where-are-you') + elif icmp_type is dpkt.icmp.ICMP_IP6_IAMHERE: + icmp_data.append('ICMP IPv6 i-am-here') + elif icmp_type is dpkt.icmp.ICMP_MOBILE_REG: + icmp_data.append('ICMP mobile registration req') + elif icmp_type is dpkt.icmp.ICMP_MOBILE_REGREPLY: + icmp_data.append('ICMP mobile registration reply') + elif icmp_type is dpkt.icmp.ICMP_DNS: + icmp_data.append('ICMP domain name request') + elif icmp_type is dpkt.icmp.ICMP_DNSREPLY: + icmp_data.append('ICMP domain name reply') + elif icmp_type is dpkt.icmp.ICMP_PHOTURIS: + icmp_data.append('ICMP Photuris') + if icmp_code is dpkt.icmp.ICMP_PHOTURIS_UNKNOWN_INDEX: + icmp_data.append(': unknown sec index') + elif icmp_code is dpkt.icmp.ICMP_PHOTURIS_AUTH_FAILED: + icmp_data.append(': auth failed') + elif icmp_code is dpkt.icmp.ICMP_PHOTURIS_DECOMPRESS_FAILED: + icmp_data.append(': decompress failed') + elif icmp_code is dpkt.icmp.ICMP_PHOTURIS_DECRYPT_FAILED: + icmp_data.append(': decrypt failed') + elif icmp_code is dpkt.icmp.ICMP_PHOTURIS_NEED_AUTHN: + icmp_data.append(': no authentication') + elif icmp_code is dpkt.icmp.ICMP_PHOTURIS_NEED_AUTHZ: + icmp_data.append(': no authorization') + elif icmp_type is dpkt.icmp.ICMP_TYPE_MAX: + icmp_data.append('ICMP Type Max') + + return u' '.join(icmp_data) + + +class Stream(object): + """Used to store packet details on network streams parsed from a pcap file.""" + + def __init__(self, packet, prot_data, source_ip, dest_ip, prot): + """Initialize new stream. + + Args: + packet: Packet data. + prot_data: Protocol level data for ARP, UDP, RCP, ICMP. + other types of ether packets, this is just the ether.data. + source_ip: Source IP. + dest_ip: Dest IP. + prot: Protocol (TCP, UDP, ICMP, ARP). + """ + self.packet_id = [packet[1]] + self.timestamps = [packet[0]] + self.size = packet[3] + self.start_time = packet[0] + self.all_data = [prot_data] + self.protocol_data = '' + self.stream_data = [] + + if prot == 'TCP' or prot == 'UDP': + self.source_port = prot_data.sport + self.dest_port = prot_data.dport + else: + self.source_port = '' + self.dest_port = '' + + self.source_ip = source_ip + self.dest_ip = dest_ip + self.protocol = prot + + def AddPacket(self, packet, prot_data): + """Add another packet to an existing stream. + + Args: + packet: Packet data. + prot_data: Protocol level data for ARP, UDP, RCP, ICMP. + other types of ether packets, this is just the ether.data + """ + self.packet_id.append(packet[1]) + self.timestamps.append(packet[0]) + self.all_data.append(prot_data) + self.size += packet[3] + + def SpecialTypes(self): + """Checks for some special types of packets. + + This method checks for some special packets and assembles usable data + currently works for: DNS (udp 53), http, netbios (udp 137), ICMP. + + Returns: + A tuple consisting of a basic desctiption of the stream + (i.e. HTTP Request) and the prettyfied string for the protocols. + """ + packet_details = [] + if self.stream_data[:4] == 'HTTP': + try: + http = dpkt.http.Response(self.stream_data) + packet_details.append('HTTP Response: status: ') + packet_details.append(http.status) + packet_details.append(' reason: ') + packet_details.append(http.reason) + packet_details.append(' version: ') + packet_details.append(http.version) + return 'HTTP Response', u' '.join(packet_details) + + except dpkt.UnpackError as exception: + packet_details = ( + u'HTTP Response Unpack Error: {0:s}. ' + u'First 20 of data {1:s}').format( + exception, repr(self.stream_data[:20])) + return 'HTTP Response', packet_details + + except IndexError as exception: + packet_details = ( + u'HTTP Response Index Error: {0:s}. First 20 of data {1:s}').format( + exception, repr(self.stream_data[:20])) + return 'HTTP Response', packet_details + + except ValueError as exception: + packet_details = ( + u'HTTP Response parsing error: {0:s}. ' + u'First 20 of data {1:s}').format( + exception, repr(self.stream_data[:20])) + return 'HTTP Response', packet_details + + elif self.stream_data[:3] == 'GET' or self.stream_data[:4] == 'POST': + try: + http = dpkt.http.Request(self.stream_data) + packet_details.append('HTTP Request: method: ') + packet_details.append(http.method) + packet_details.append(' uri: ') + packet_details.append(http.uri) + packet_details.append(' version: ') + packet_details.append(http.version) + packet_details.append(' headers: ') + packet_details.append(repr(http.headers)) + return 'HTTP Request', u' '.join(packet_details) + + except dpkt.UnpackError as exception: + packet_details = ( + u'HTTP Request unpack error: {0:s}. First 20 of data {1:s}').format( + exception, repr(self.stream_data[:20])) + return 'HTTP Request', packet_details + + except ValueError as exception: + packet_details = ( + u'HTTP Request parsing error: {0:s}. ' + u'First 20 of data {1:s}').format( + exception, repr(self.stream_data[:20])) + return 'HTTP Request', packet_details + + elif self.protocol == 'UDP' and ( + self.source_port == 53 or self.dest_port == 53): + # DNS request/replies. + # Check to see if the lengths are valid. + for packet in self.all_data: + if not packet.ulen == len(packet): + packet_details.append('Truncated DNS packets - unable to parse: ') + packet_details.append(repr(self.stream_data[15:40])) + return 'DNS', u' '.join(packet_details) + + return 'DNS', ParseDNS(self.stream_data) + + elif self.protocol == 'UDP' and ( + self.source_port == 137 or self.dest_port == 137): + return 'NetBIOS', ParseNetBios(dpkt.netbios.NS(self.stream_data)) + + elif self.protocol == 'ICMP': + # ICMP packets all end up as 1 stream, so they need to be + # processed 1 by 1. + return 'ICMP', ICMPTypes(self.all_data[0]) + + elif '\x03\x01' in self.stream_data[1:3]: + # Some form of ssl3 data. + try: + ssl = dpkt.ssl.SSL2(self.stream_data) + packet_details.append('SSL data. Length: ') + packet_details.append(str(ssl.len)) + return 'SSL', u' '.join(packet_details) + except dpkt.UnpackError as exception: + packet_details = ( + u'SSL unpack error: {0:s}. First 20 of data {1:s}').format( + exception, repr(self.stream_data[:20])) + return 'SSL', packet_details + + elif '\x03\x00' in self.stream_data[1:3]: + # Some form of ssl3 data. + try: + ssl = dpkt.ssl.SSL2(self.stream_data) + packet_details.append('SSL data. Length: ') + packet_details.append(str(ssl.len)) + return 'SSL', u' '.join(packet_details) + + except dpkt.UnpackError as exception: + packet_details = ( + u'SSL unpack error: {0:s}. First 20 of data {1:s}').format( + exception, repr(self.stream_data[:20])) + return 'SSL', packet_details + + return 'other', self.protocol_data + + def Clean(self): + """Clean up stream data.""" + clean_data = [] + for packet in self.all_data: + try: + clean_data.append(packet.data) + except AttributeError: + pass + + self.stream_data = ''.join(clean_data) + + +class PcapEvent(time_events.PosixTimeEvent): + """Convenience class for a PCAP record event.""" + + DATA_TYPE = 'metadata:pcap' + + def __init__(self, timestamp, usage, stream_object): + """Initializes the event. + + Args: + timestamp: The POSIX value of the timestamp. + usage: A usage description value. + stream_object: The stream object (instance of Stream). + """ + super(PcapEvent, self).__init__(timestamp, usage) + + self.source_ip = stream_object.source_ip + self.dest_ip = stream_object.dest_ip + self.source_port = stream_object.source_port + self.dest_port = stream_object.dest_port + self.protocol = stream_object.protocol + self.size = stream_object.size + self.stream_type, self.protocol_data = stream_object.SpecialTypes() + self.first_packet_id = min(stream_object.packet_id) + self.last_packet_id = max(stream_object.packet_id) + self.packet_count = len(stream_object.packet_id) + self.stream_data = repr(stream_object.stream_data[:50]) + + +class PcapParser(interface.BaseParser): + """Parses PCAP files.""" + + NAME = 'pcap' + DESCRIPTION = u'Parser for PCAP files.' + + def _ParseIPPacket( + self, connections, trunc_list, packet_number, timestamp, + packet_data_size, ip_packet): + """Parses an IP packet. + + Args: + connections: A dictionary object to track the IP connections. + trunc_list: A list of packets that truncated strangely and could + not be turned into a stream. + packet_number: The PCAP packet number, where 1 is the first packet. + timestamp: The PCAP packet timestamp. + packet_data_size: The packet data size. + ip_packet: The IP packet (instance of dpkt.ip.IP). + """ + packet_values = [timestamp, packet_number, ip_packet, packet_data_size] + + source_ip_address = socket.inet_ntoa(ip_packet.src) + destination_ip_address = socket.inet_ntoa(ip_packet.dst) + + if ip_packet.p == dpkt.ip.IP_PROTO_TCP: + # Later versions of dpkt seem to return a string instead of a TCP object. + if isinstance(ip_packet.data, str): + try: + tcp = dpkt.tcp.TCP(ip_packet.data) + except (dpkt.NeedData, dpkt.UnpackError): + trunc_list.append(packet_values) + return + + else: + tcp = ip_packet.data + + stream_key = 'tcp: {0:s}:{1:d} > {2:s}:{3:d}'.format( + source_ip_address, tcp.sport, destination_ip_address, tcp.dport) + + if stream_key in connections: + connections[stream_key].AddPacket(packet_values, tcp) + else: + connections[stream_key] = Stream( + packet_values, tcp, source_ip_address, destination_ip_address, + 'TCP') + + elif ip_packet.p == dpkt.ip.IP_PROTO_UDP: + # Later versions of dpkt seem to return a string instead of an UDP object. + if isinstance(ip_packet.data, str): + try: + udp = dpkt.udp.UDP(ip_packet.data) + except (dpkt.NeedData, dpkt.UnpackError): + trunc_list.append(packet_values) + return + + else: + udp = ip_packet.data + + stream_key = 'udp: {0:s}:{1:d} > {2:s}:{3:d}'.format( + source_ip_address, udp.sport, destination_ip_address, udp.dport) + + if stream_key in connections: + connections[stream_key].AddPacket(packet_values, udp) + else: + connections[stream_key] = Stream( + packet_values, udp, source_ip_address, destination_ip_address, + 'UDP') + + elif ip_packet.p == dpkt.ip.IP_PROTO_ICMP: + # Later versions of dpkt seem to return a string instead of + # an ICMP object. + if isinstance(ip_packet.data, str): + icmp = dpkt.icmp.ICMP(ip_packet.data) + else: + icmp = ip_packet.data + + stream_key = 'icmp: {0:d} {1:s} > {2:s}'.format( + timestamp, source_ip_address, destination_ip_address) + + if stream_key in connections: + connections[stream_key].AddPacket(packet_values, icmp) + else: + connections[stream_key] = Stream( + packet_values, icmp, source_ip_address, destination_ip_address, + 'ICMP') + + def _ParseOtherPacket(self, packet_values): + """Parses a non-IP packet. + + Args: + packet_values: list of packet values + + Returns: + A stream object (instance of Stream) or None if the packet data + is not supported. + """ + ether = packet_values[2] + stream_object = None + + if ether.type == dpkt.ethernet.ETH_TYPE_ARP: + arp = ether.data + arp_data = [] + stream_object = Stream( + packet_values, arp, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), 'ARP') + + if arp.op == dpkt.arp.ARP_OP_REQUEST: + arp_data.append('arp request: target IP = ') + arp_data.append(socket.inet_ntoa(arp.tpa)) + stream_object.protocol_data = u' '.join(arp_data) + + elif arp.op == dpkt.arp.ARP_OP_REPLY: + arp_data.append('arp reply: target IP = ') + arp_data.append(socket.inet_ntoa(arp.tpa)) + arp_data.append(' target MAC = ') + arp_data.append(binascii.hexlify(arp.tha)) + stream_object.protocol_data = u' '.join(arp_data) + + elif arp.op == dpkt.arp.ARP_OP_REVREQUEST: + arp_data.append('arp protocol address request: target IP = ') + arp_data.append(socket.inet_ntoa(arp.tpa)) + stream_object.protocol_data = u' '.join(arp_data) + + elif arp.op == dpkt.arp.ARP_OP_REVREPLY: + arp_data.append('arp protocol address reply: target IP = ') + arp_data.append(socket.inet_ntoa(arp.tpa)) + arp_data.append(' target MAC = ') + arp_data.append(binascii.hexlify(arp.tha)) + stream_object.protocol_data = u' '.join(arp_data) + + elif ether.type == dpkt.ethernet.ETH_TYPE_IP6: + ip6 = ether.data + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ip6.src), + binascii.hexlify(ip6.dst), 'IPv6') + stream_object.protocol_data = 'IPv6' + + elif ether.type == dpkt.ethernet.ETH_TYPE_CDP: + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), 'CDP') + stream_object.protocol_data = 'CDP' + + elif ether.type == dpkt.ethernet.ETH_TYPE_DTP: + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), 'DTP') + stream_object.protocol_data = 'DTP' + + elif ether.type == dpkt.ethernet.ETH_TYPE_REVARP: + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), 'RARP') + stream_object.protocol_data = 'Reverse ARP' + + elif ether.type == dpkt.ethernet.ETH_TYPE_8021Q: + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), '8021Q packet') + stream_object.protocol_data = '8021Q packet' + + elif ether.type == dpkt.ethernet.ETH_TYPE_IPX: + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), 'IPX') + stream_object.protocol_data = 'IPX' + + elif ether.type == dpkt.ethernet.ETH_TYPE_PPP: + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), 'PPP') + stream_object.protocol_data = 'PPP' + + elif ether.type == dpkt.ethernet.ETH_TYPE_MPLS: + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), 'MPLS') + stream_object.protocol_data = 'MPLS' + + elif ether.type == dpkt.ethernet.ETH_TYPE_MPLS_MCAST: + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), 'MPLS') + stream_object.protocol_data = 'MPLS MCAST' + + elif ether.type == dpkt.ethernet.ETH_TYPE_PPPoE_DISC: + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), 'PPOE') + stream_object.protocol_data = 'PPoE Disc packet' + + elif ether.type == dpkt.ethernet.ETH_TYPE_PPPoE: + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), 'PPPoE') + stream_object.protocol_data = 'PPPoE' + + elif str(hex(ether.type)) == '0x2452': + stream_object = Stream( + packet_values, ether.data, binascii.hexlify(ether.src), + binascii.hexlify(ether.dst), '802.11') + stream_object.protocol_data = '802.11' + + return stream_object + + def _ParseOtherStreams(self, other_list, trunc_list): + """Process PCAP packets that are not IP packets. + + For all packets that were not IP packets, create stream containers + depending on the type of packet. + + Args: + other_list: List of non-ip packets. + trunc_list: A list of packets that truncated strangely and could + not be turned into a stream. + + Returns: + A list of stream objects (instances of Stream). + """ + other_streams = [] + + for packet_values in other_list: + stream_object = self._ParseOtherPacket(packet_values) + if stream_object: + other_streams.append(stream_object) + + for packet_values in trunc_list: + ip_packet = packet_values[2] + + source_ip_address = socket.inet_ntoa(ip_packet.src) + destination_ip_address = socket.inet_ntoa(ip_packet.dst) + stream_object = Stream( + packet_values, ip_packet.data, source_ip_address, + destination_ip_address, 'BAD') + stream_object.protocolData = 'Bad truncated IP packet' + other_streams.append(stream_object) + + return other_streams + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Parses a PCAP file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + self.ParseFileObject( + parser_context, file_object, file_entry=file_entry, + parser_chain=parser_chain) + file_object.close() + + def ParseFileObject( + self, parser_context, file_object, file_entry=None, parser_chain=None): + """Parses a PCAP file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_object: A file-like object. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + UnableToParseFile: when the file cannot be parsed. + """ + data = file_object.read(dpkt.pcap.FileHdr.__hdr_len__) + + try: + file_header = dpkt.pcap.FileHdr(data) + packet_header_class = dpkt.pcap.PktHdr + + except (dpkt.NeedData, dpkt.UnpackError) as exception: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file: {1:s} with error: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + if file_header.magic == dpkt.pcap.PMUDPCT_MAGIC: + try: + file_header = dpkt.pcap.LEFileHdr(data) + packet_header_class = dpkt.pcap.LEPktHdr + + except (dpkt.NeedData, dpkt.UnpackError) as exception: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file: {1:s} with error: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + elif file_header.magic != dpkt.pcap.TCPDUMP_MAGIC: + raise errors.UnableToParseFile(u'Unsupported file signature') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + packet_number = 1 + connections = {} + other_list = [] + trunc_list = [] + + data = file_object.read(dpkt.pcap.PktHdr.__hdr_len__) + while data: + packet_header = packet_header_class(data) + timestamp = packet_header.tv_sec + (packet_header.tv_usec / 1000000.0) + packet_data = file_object.read(packet_header.caplen) + + ethernet_frame = dpkt.ethernet.Ethernet(packet_data) + + if ethernet_frame.type == dpkt.ethernet.ETH_TYPE_IP: + self._ParseIPPacket( + connections, trunc_list, packet_number, timestamp, + len(ethernet_frame), ethernet_frame.data) + + else: + packet_values = [ + timestamp, packet_number, ethernet_frame, len(ethernet_frame)] + other_list.append(packet_values) + + packet_number += 1 + data = file_object.read(dpkt.pcap.PktHdr.__hdr_len__) + + other_streams = self._ParseOtherStreams(other_list, trunc_list) + + for stream_object in sorted( + connections.values(), key=operator.attrgetter('start_time')): + + if not stream_object.protocol == 'ICMP': + stream_object.Clean() + + event_objects = [ + PcapEvent( + min(stream_object.timestamps), + eventdata.EventTimestamp.START_TIME, stream_object), + PcapEvent( + max(stream_object.timestamps), + eventdata.EventTimestamp.END_TIME, stream_object)] + + parser_context.ProduceEvents( + event_objects, parser_chain=parser_chain, file_entry=file_entry) + + for stream_object in other_streams: + event_objects = [ + PcapEvent( + min(stream_object.timestamps), + eventdata.EventTimestamp.START_TIME, stream_object), + PcapEvent( + max(stream_object.timestamps), + eventdata.EventTimestamp.END_TIME, stream_object)] + parser_context.ProduceEvents( + event_objects, parser_chain=parser_chain, file_entry=file_entry) + + +manager.ParsersManager.RegisterParser(PcapParser) diff --git a/plaso/parsers/pcap_test.py b/plaso/parsers/pcap_test.py new file mode 100644 index 0000000..7a03b03 --- /dev/null +++ b/plaso/parsers/pcap_test.py @@ -0,0 +1,117 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors.. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the PCAP parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import pcap as pcap_formatter +from plaso.parsers import pcap +from plaso.parsers import test_lib + + +class PcapParserTest(test_lib.ParserTestCase): + """Tests for the PCAP parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = pcap.PcapParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['test.pcap']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # PCAP information: + # Number of streams: 96 (TCP: 47, UDP: 39, ICMP: 0, Other: 10) + # + # For each stream 2 event objects are generated one for the start + # and one for the end time. + + self.assertEquals(len(event_objects), 192) + + # Test stream 3 (event object 6). + # Protocol: TCP + # Source IP: 192.168.195.130 + # Dest IP: 63.245.217.43 + # Source Port: 1038 + # Dest Port: 443 + # Stream Type: SSL + # Starting Packet: 4 + # Ending Packet: 6 + + event_object = event_objects[6] + self.assertEquals(event_object.packet_count, 3) + self.assertEquals(event_object.protocol, u'TCP') + self.assertEquals(event_object.source_ip, u'192.168.195.130') + self.assertEquals(event_object.dest_ip, u'63.245.217.43') + self.assertEquals(event_object.dest_port, 443) + self.assertEquals(event_object.source_port, 1038) + self.assertEquals(event_object.stream_type, u'SSL') + self.assertEquals(event_object.first_packet_id, 4) + self.assertEquals(event_object.last_packet_id, 6) + + # Test stream 6 (event object 12). + # Protocol: UDP + # Source IP: 192.168.195.130 + # Dest IP: 192.168.195.2 + # Source Port: 55679 + # Dest Port: 53 + # Stream Type: DNS + # Starting Packet: 4 + # Ending Packet: 6 + # Protocol Data: DNS Query for wpad.localdomain + + event_object = event_objects[12] + self.assertEquals(event_object.packet_count, 5) + self.assertEquals(event_object.protocol, u'UDP') + self.assertEquals(event_object.source_ip, u'192.168.195.130') + self.assertEquals(event_object.dest_ip, u'192.168.195.2') + self.assertEquals(event_object.dest_port, 53) + self.assertEquals(event_object.source_port, 55679) + self.assertEquals(event_object.stream_type, u'DNS') + self.assertEquals(event_object.first_packet_id, 11) + self.assertEquals(event_object.last_packet_id, 1307) + self.assertEquals( + event_object.protocol_data, u'DNS Query for wpad.localdomain') + + expected_msg = ( + u'Source IP: 192.168.195.130 ' + u'Destination IP: 192.168.195.2 ' + u'Source Port: 55679 ' + u'Destination Port: 53 ' + u'Protocol: UDP ' + u'Type: DNS ' + u'Size: 380 ' + u'Protocol Data: DNS Query for wpad.localdomain ' + u'Stream Data: \'\\xb8\\x9c\\x01\\x00\\x00\\x01\\x00\\x00\\x00\\x00' + u'\\x00\\x00\\x04wpad\\x0blocaldomain\\x00\\x00\\x01\\x00\\x01\\xb8' + u'\\x9c\\x01\\x00\\x00\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x04wpa\' ' + u'First Packet ID: 11 ' + u'Last Packet ID: 1307 ' + u'Packet Count: 5') + expected_msg_short = ( + u'Type: DNS ' + u'First Packet ID: 11') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist.py b/plaso/parsers/plist.py new file mode 100644 index 0000000..5a07431 --- /dev/null +++ b/plaso/parsers/plist.py @@ -0,0 +1,159 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Property List (Plist) Parser. + +Plaso's engine calls PlistParser when it encounters Plist files to be processed. +""" + +import binascii +import logging + +from binplist import binplist + +from plaso.lib import errors +from plaso.lib import utils +from plaso.parsers import interface +from plaso.parsers import manager + + +class PlistParser(interface.BasePluginsParser): + """De-serializes and parses plists the event objects are generated by plist. + + The Plaso engine calls parsers by their Parse() method. This parser's + Parse() has GetTopLevel() which deserializes plist files using the binplist + library and calls plugins (PlistPlugin) registered through the + interface by their Process() to produce event objects. + + Plugins are how this parser understands the content inside a plist file, + each plugin holds logic specific to a particular plist file. See the + interface and plist_plugins/ directory for examples of how plist plugins are + implemented. + """ + + NAME = 'plist' + DESCRIPTION = u'Parser for binary and text plist files.' + + _plugin_classes = {} + + def __init__(self): + """Initializes a parser object.""" + super(PlistParser, self).__init__() + self._plugins = PlistParser.GetPluginObjects() + + def GetTopLevel(self, file_object, file_name=''): + """Returns the deserialized content of a plist as a dictionary object. + + Args: + file_object: A file-like object to parse. + file_name: The name of the file-like object. + + Returns: + A dictionary object representing the contents of the plist. + """ + try: + top_level_object = binplist.readPlist(file_object) + except binplist.FormatError as exception: + raise errors.UnableToParseFile( + u'[{0:s}] File is not a plist file: {1:s}'.format( + self.NAME, utils.GetUnicodeString(exception))) + except ( + LookupError, binascii.Error, ValueError, AttributeError) as exception: + raise errors.UnableToParseFile( + u'[{0:s}] Unable to parse XML file, reason: {1:s}'.format( + self.NAME, exception)) + except OverflowError as exception: + raise errors.UnableToParseFile( + u'[{0:s}] Unable to parse: {1:s} with error: {2:s}'.format( + self.NAME, file_name, exception)) + + if not top_level_object: + raise errors.UnableToParseFile( + u'[{0:s}] File is not a plist: {1:s}'.format( + self.NAME, utils.GetUnicodeString(file_name))) + + # Since we are using readPlist from binplist now instead of manually + # opening up the BinarPlist file we loose this option. Keep it commented + # out for now but this needs to be tested a bit more. + # TODO: Re-evaluate if we can delete this or still require it. + #if bpl.is_corrupt: + # logging.warning( + # u'[{0:s}] corruption detected in binary plist: {1:s}'.format( + # self.NAME, file_name)) + + return top_level_object + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Parse and extract values from a plist file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + # TODO: Should we rather query the stats object to get the size here? + file_object = file_entry.GetFileObject() + file_size = file_object.get_size() + + if file_size <= 0: + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] file size: {1:d} bytes is less equal 0.'.format( + self.NAME, file_size)) + + # 50MB is 10x larger than any plist seen to date. + if file_size > 50000000: + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] file size: {1:d} bytes is larger than 50 MB.'.format( + self.NAME, file_size)) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + top_level_object = None + try: + top_level_object = self.GetTopLevel(file_object, file_entry.name) + except errors.UnableToParseFile: + file_object.close() + raise + + if not top_level_object: + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse: {1:s} skipping.'.format( + self.NAME, file_entry.name)) + + file_system = file_entry.GetFileSystem() + plist_name = file_system.BasenamePath(file_entry.name) + + for plugin_object in self._plugins: + try: + plugin_object.Process( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + plist_name=plist_name, top_level=top_level_object) + + except errors.WrongPlistPlugin as exception: + logging.debug(u'[{0:s}] Wrong plugin: {1:s} for: {2:s}'.format( + self.NAME, exception[0], exception[1])) + + file_object.close() + + +manager.ParsersManager.RegisterParser(PlistParser) diff --git a/plaso/parsers/plist_plugins/__init__.py b/plaso/parsers/plist_plugins/__init__.py new file mode 100644 index 0000000..14d448a --- /dev/null +++ b/plaso/parsers/plist_plugins/__init__.py @@ -0,0 +1,30 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an import statement for each plist related plugin.""" + +from plaso.parsers.plist_plugins import airport +from plaso.parsers.plist_plugins import appleaccount +from plaso.parsers.plist_plugins import bluetooth +from plaso.parsers.plist_plugins import ipod +from plaso.parsers.plist_plugins import install_history +from plaso.parsers.plist_plugins import macuser +from plaso.parsers.plist_plugins import safari +from plaso.parsers.plist_plugins import softwareupdate +from plaso.parsers.plist_plugins import spotlight +from plaso.parsers.plist_plugins import spotlight_volume +from plaso.parsers.plist_plugins import timemachine +from plaso.parsers.plist_plugins import default diff --git a/plaso/parsers/plist_plugins/airport.py b/plaso/parsers/plist_plugins/airport.py new file mode 100644 index 0000000..d2c9ecd --- /dev/null +++ b/plaso/parsers/plist_plugins/airport.py @@ -0,0 +1,63 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the airport plist plugin in Plaso.""" + +from plaso.events import plist_event +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class AirportPlugin(interface.PlistPlugin): + """Plist plugin that extracts WiFi information.""" + + NAME = 'plist_airport' + DESCRIPTION = u'Parser for Airport plist files.' + + PLIST_PATH = 'com.apple.airport.preferences.plist' + PLIST_KEYS = frozenset(['RememberedNetworks']) + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, match=None, + **unused_kwargs): + """Extracts relevant Airport entries. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + match: Optional dictionary containing keys extracted from PLIST_KEYS. + The default is None. + """ + for wifi in match['RememberedNetworks']: + description = ( + u'[WiFi] Connected to network: <{0:s}> using security {1:s}').format( + wifi.get('SSIDString', u'no SSID string'), + wifi.get('SecurityType', u'N/A')) + last_connected = wifi.get('LastConnected') + event_object = plist_event.PlistEvent( + u'/RememberedNetworks', u'item', last_connected, description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +plist.PlistParser.RegisterPlugin(AirportPlugin) diff --git a/plaso/parsers/plist_plugins/airport_test.py b/plaso/parsers/plist_plugins/airport_test.py new file mode 100644 index 0000000..0f602cc --- /dev/null +++ b/plaso/parsers/plist_plugins/airport_test.py @@ -0,0 +1,69 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the airport plist plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import plist as plist_formatter +from plaso.parsers import plist +from plaso.parsers.plist_plugins import airport +from plaso.parsers.plist_plugins import test_lib + + +class AirportPluginTest(test_lib.PlistPluginTestCase): + """Tests for the airport plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = airport.AirportPlugin() + self._parser = plist.PlistParser() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['com.apple.airport.preferences.plist']) + plist_name = 'com.apple.airport.preferences.plist' + event_queue_consumer = self._ParsePlistFileWithPlugin( + self._parser, self._plugin, test_file, plist_name) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 4) + + timestamps = [] + for event_object in event_objects: + timestamps.append(event_object.timestamp) + expected_timestamps = frozenset([ + 1375144166000000, 1386874984000000, 1386949546000000, + 1386950747000000]) + self.assertTrue(set(timestamps) == expected_timestamps) + + event_object = event_objects[0] + self.assertEqual(event_object.key, u'item') + self.assertEqual(event_object.root, u'/RememberedNetworks') + expected_desc = ( + u'[WiFi] Connected to network: using security ' + u'WPA/WPA2 Personal') + self.assertEqual(event_object.desc, expected_desc) + expected_string = u'/RememberedNetworks/item {0:s}'.format(expected_desc) + expected_short = expected_string[:77] + u'...' + self._TestGetMessageStrings( + event_object, expected_string, expected_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/appleaccount.py b/plaso/parsers/plist_plugins/appleaccount.py new file mode 100644 index 0000000..f881aa1 --- /dev/null +++ b/plaso/parsers/plist_plugins/appleaccount.py @@ -0,0 +1,111 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a Apple Account plist plugin in Plaso.""" + +from plaso.events import plist_event +from plaso.lib import errors +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class AppleAccountPlugin(interface.PlistPlugin): + """Basic plugin to extract the apple account information.""" + + NAME = 'plist_appleaccount' + DESCRIPTION = u'Parser for Apple account information plist files.' + + PLIST_PATH = u'com.apple.coreservices.appleidauthenticationinfo' + PLIST_KEYS = frozenset(['AuthCertificates', 'AccessorVersions', 'Accounts']) + + def Process( + self, parser_context, file_entry=None, parser_chain=None, plist_name=None, + top_level=None, **kwargs): + """Check if it is a valid Apple account plist file name. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + plist_name: name of the plist file. + top_level: dictionary with the plist file parsed. + """ + if not plist_name.startswith(self.PLIST_PATH): + raise errors.WrongPlistPlugin(self.NAME, plist_name) + super(AppleAccountPlugin, self).Process( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + plist_name=self.PLIST_PATH, top_level=top_level, **kwargs) + + # Generated events: + # Accounts: account name. + # FirstName: first name associated with the account. + # LastName: family name associate with the account. + # CreationDate: timestamp when the account was configured in the system. + # LastSuccessfulConnect: last time when the account was connected. + # ValidationDate: last time when the account was validated. + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, match=None, + **unused_kwargs): + """Extracts relevant Apple Account entries. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + match: Optional dictionary containing keys extracted from PLIST_KEYS. + The default is None. + """ + root = '/Accounts' + + for name_account, account in match['Accounts'].iteritems(): + general_description = u'{0:s} ({1:s} {2:s})'.format( + name_account, account.get('FirstName', ''), + account.get('LastName', '')) + key = name_account + description = u'Configured Apple account {0:s}'.format( + general_description) + event_object = plist_event.PlistEvent( + root, key, account['CreationDate'], description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if 'LastSuccessfulConnect' in account: + description = u'Connected Apple account {0:s}'.format( + general_description) + event_object = plist_event.PlistEvent( + root, key, account['LastSuccessfulConnect'], description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if 'ValidationDate' in account: + description = u'Last validation Apple account {0:s}'.format( + general_description) + event_object = plist_event.PlistEvent( + root, key, account['ValidationDate'], description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +plist.PlistParser.RegisterPlugin(AppleAccountPlugin) diff --git a/plaso/parsers/plist_plugins/appleaccount_test.py b/plaso/parsers/plist_plugins/appleaccount_test.py new file mode 100644 index 0000000..bd5072f --- /dev/null +++ b/plaso/parsers/plist_plugins/appleaccount_test.py @@ -0,0 +1,79 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Apple account plist plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import plist as plist_formatter +from plaso.parsers import plist +from plaso.parsers.plist_plugins import appleaccount +from plaso.parsers.plist_plugins import test_lib + + +class AppleAccountPluginTest(test_lib.PlistPluginTestCase): + """Tests for the Apple account plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = appleaccount.AppleAccountPlugin() + self._parser = plist.PlistParser() + + def testProcess(self): + """Tests the Process function.""" + plist_file = (u'com.apple.coreservices.appleidauthenticationinfo.' + u'ABC0ABC1-ABC0-ABC0-ABC0-ABC0ABC1ABC2.plist') + test_file = self._GetTestFilePath([plist_file]) + plist_name = plist_file + event_queue_consumer = self._ParsePlistFileWithPlugin( + self._parser, self._plugin, test_file, plist_name) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 3) + + timestamps = [] + for event_object in event_objects: + timestamps.append(event_object.timestamp) + expected_timestamps = frozenset([ + 1372106802000000, 1387980032000000, 1387980032000000]) + self.assertTrue(set(timestamps) == expected_timestamps) + + event_object = event_objects[0] + self.assertEqual(event_object.root, u'/Accounts') + self.assertEqual(event_object.key, u'email@domain.com') + expected_desc = ( + u'Configured Apple account email@domain.com (Joaquin Moreno Garijo)') + self.assertEqual(event_object.desc, expected_desc) + expected_string = u'/Accounts/email@domain.com {0:s}'.format(expected_desc) + expected_short = expected_string[:77] + u'...' + self._TestGetMessageStrings( + event_object, expected_string, expected_short) + + event_object = event_objects[1] + expected_desc = (u'Connected Apple account ' + u'email@domain.com (Joaquin Moreno Garijo)') + self.assertEqual(event_object.desc, expected_desc) + + event_object = event_objects[2] + expected_desc = (u'Last validation Apple account ' + u'email@domain.com (Joaquin Moreno Garijo)') + self.assertEqual(event_object.desc, expected_desc) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/bluetooth.py b/plaso/parsers/plist_plugins/bluetooth.py new file mode 100644 index 0000000..dff51ae --- /dev/null +++ b/plaso/parsers/plist_plugins/bluetooth.py @@ -0,0 +1,99 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a default plist plugin in Plaso.""" + +from plaso.events import plist_event +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +class BluetoothPlugin(interface.PlistPlugin): + """Basic plugin to extract interesting Bluetooth related keys.""" + + NAME = 'plist_bluetooth' + DESCRIPTION = u'Parser for Bluetooth plist files.' + + PLIST_PATH = 'com.apple.bluetooth.plist' + PLIST_KEYS = frozenset(['DeviceCache', 'PairedDevices']) + + # LastInquiryUpdate = Device connected via Bluetooth Discovery. Updated + # when a device is detected in discovery mode. E.g. BT headphone power + # on. Pairing is not required for a device to be discovered and cached. + # + # LastNameUpdate = When the human name was last set. Usually done only once + # during initial setup. + # + # LastServicesUpdate = Time set when device was polled to determine what it + # is. Usually done at setup or manually requested via advanced menu. + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, match=None, + **unused_kwargs): + """Extracts relevant BT entries. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + match: Optional dictionary containing extracted keys from PLIST_KEYS. + The default is None. + """ + root = '/DeviceCache' + + for device, value in match['DeviceCache'].items(): + name = value.get('Name', '') + if name: + name = u''.join(('Name:', name)) + + if device in match['PairedDevices']: + desc = 'Paired:True {0:s}'.format(name) + key = device + if 'LastInquiryUpdate' in value: + event_object = plist_event.PlistEvent( + root, key, value['LastInquiryUpdate'], desc) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if value.get('LastInquiryUpdate'): + desc = u' '.join(filter(None, ('Bluetooth Discovery', name))) + key = u''.join((device, '/LastInquiryUpdate')) + event_object = plist_event.PlistEvent( + root, key, value['LastInquiryUpdate'], desc) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if value.get('LastNameUpdate'): + desc = u' '.join(filter(None, ('Device Name Set', name))) + key = u''.join((device, '/LastNameUpdate')) + event_object = plist_event.PlistEvent( + root, key, value['LastNameUpdate'], desc) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if value.get('LastServicesUpdate'): + desc = desc = u' '.join(filter(None, ('Services Updated', name))) + key = ''.join((device, '/LastServicesUpdate')) + event_object = plist_event.PlistEvent( + root, key, value['LastServicesUpdate'], desc) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +plist.PlistParser.RegisterPlugin(BluetoothPlugin) diff --git a/plaso/parsers/plist_plugins/bluetooth_test.py b/plaso/parsers/plist_plugins/bluetooth_test.py new file mode 100644 index 0000000..fd79baa --- /dev/null +++ b/plaso/parsers/plist_plugins/bluetooth_test.py @@ -0,0 +1,85 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Bluetooth plist plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import plist as plist_formatter +from plaso.parsers import plist +from plaso.parsers.plist_plugins import bluetooth +from plaso.parsers.plist_plugins import test_lib + + +class TestBtPlugin(test_lib.PlistPluginTestCase): + """Tests for the Bluetooth plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = bluetooth.BluetoothPlugin() + self._parser = plist.PlistParser() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['plist_binary']) + plist_name = 'com.apple.bluetooth.plist' + event_queue_consumer = self._ParsePlistFileWithPlugin( + self._parser, self._plugin, test_file, plist_name) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 14) + + paired_event_objects = [] + timestamps = [] + for event_object in event_objects: + timestamps.append(event_object.timestamp) + if event_object.desc.startswith(u'Paired'): + paired_event_objects.append(event_object) + + # Ensure all 14 events and times from the plist are parsed correctly. + self.assertEquals(len(timestamps), 14) + + expected_timestamps = frozenset([ + 1341957896010535, 1341957896010535, 1350666385239661, 1350666391557044, + 1341957900020116, 1302199013524275, 1301012201414766, 1351818797324095, + 1351818797324095, 1351819298997672, 1351818803000000, 1351827808261762, + 1345251268370453, 1345251192528750]) + + self.assertTrue(set(timestamps) == expected_timestamps) + + # Ensure two paired devices are matched. + self.assertEquals(len(paired_event_objects), 2) + + # One of the paired event object descriptions should contain the string: + # Paired:True Name:Apple Magic Trackpad 2. + paired_descriptions = [ + event_object.desc for event_object in paired_event_objects] + + self.assertTrue( + 'Paired:True Name:Apple Magic Trackpad 2' in paired_descriptions) + + expected_string = ( + u'/DeviceCache/44-00-00-00-00-04 ' + u'Paired:True ' + u'Name:Apple Magic Trackpad 2') + + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/default.py b/plaso/parsers/plist_plugins/default.py new file mode 100644 index 0000000..23dac2b --- /dev/null +++ b/plaso/parsers/plist_plugins/default.py @@ -0,0 +1,86 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a default plist plugin in Plaso.""" + +import datetime +import logging + +from plaso.events import plist_event +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +class DefaultPlugin(interface.PlistPlugin): + """Basic plugin to extract keys with timestamps as values from plists.""" + + NAME = 'plist_default' + DESCRIPTION = u'Parser for plist files.' + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, top_level=None, + **unused_kwargs): + """Simple method to exact date values from a Plist. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + top_level: Plist in dictionary form. + """ + for root, key, value in interface.RecurseKey(top_level): + if isinstance(value, datetime.datetime): + event_object = plist_event.PlistEvent(root, key, value) + parser_context.ProduceEvent( + event_object, file_entry=file_entry, parser_chain=parser_chain) + + # TODO: Binplist keeps a list of offsets but not mapped to a key. + # adjust code when there is a way to map keys to offsets. + + # TODO: move this into the parser as with the olecf plugins. + def Process( + self, parser_context, file_entry=None, parser_chain=None, plist_name=None, + top_level=None, **kwargs): + """Overwrite the default Process function so it always triggers. + + Process() checks if the current plist being processed is a match for a + plugin by comparing the PATH and KEY requirements defined by a plugin. If + both match processing continues; else raise WrongPlistPlugin. + + The purpose of the default plugin is to always trigger on any given plist + file, thus it needs to overwrite the default behavior of comparing PATH + and KEY. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + plist_name: Name of the plist file. + top_level: Plist in dictionary form. + """ + logging.debug(u'Plist {0:s} plugin used for: {1:s}'.format( + self.NAME, plist_name)) + self.GetEntries( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + top_level=top_level, **kwargs) + + +plist.PlistParser.RegisterPlugin(DefaultPlugin) diff --git a/plaso/parsers/plist_plugins/default_test.py b/plaso/parsers/plist_plugins/default_test.py new file mode 100644 index 0000000..cb595c3 --- /dev/null +++ b/plaso/parsers/plist_plugins/default_test.py @@ -0,0 +1,110 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the default plist plugin.""" + +import datetime +import unittest + +# pylint: disable=unused-import +from plaso.formatters import plist as plist_formatter +from plaso.lib import timelib_test +from plaso.parsers.plist_plugins import default +from plaso.parsers.plist_plugins import test_lib + +import pytz + + +class TestDefaultPlist(test_lib.PlistPluginTestCase): + """Tests for the default plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = default.DefaultPlugin() + + def testProcessSingle(self): + """Tests Process on a plist containing a root, value and timestamp.""" + top_level_dict_single = { + 'DE-00-AD-00-BE-EF': { + 'Name': 'DBF Industries Slideshow Lazer', 'LastUsed': + datetime.datetime( + 2012, 11, 2, 1, 21, 38, 997672, tzinfo=pytz.utc)}} + + event_object_generator = self._ParsePlistWithPlugin( + self._plugin, 'single', top_level_dict_single) + event_objects = self._GetEventObjectsFromQueue(event_object_generator) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-11-02 01:21:38.997672') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals(event_object.root, u'/DE-00-AD-00-BE-EF') + self.assertEquals(event_object.key, u'LastUsed') + + expected_string = ( + u'/DE-00-AD-00-BE-EF/LastUsed') + + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + def testProcessMulti(self): + """Tests Process on a plist containing five keys with date values.""" + top_level_dict_many_keys = { + 'DeviceCache': { + '44-00-00-00-00-04': { + 'Name': 'Apple Magic Trackpad 2', 'LMPSubversion': 796, + 'LMPVersion': 3, 'PageScanMode': 0, 'ClassOfDevice': 9620, + 'SupportedFeatures': '\x00\x00\x00\x00', 'Manufacturer': 76, + 'PageScanPeriod': 0, 'ClockOffset': 17981, 'LastNameUpdate': + datetime.datetime( + 2012, 11, 2, 1, 21, 38, 997672, tzinfo=pytz.utc), + 'InquiryRSSI': 198, 'PageScanRepetitionMode': 1, + 'LastServicesUpdate': + datetime.datetime(2012, 11, 2, 1, 13, 23, tzinfo=pytz.utc), + 'displayName': 'Apple Magic Trackpad 2', 'LastInquiryUpdate': + datetime.datetime( + 2012, 11, 2, 1, 13, 17, 324095, tzinfo=pytz.utc), + 'Services': '', 'BatteryPercent': 0.61}, + '44-00-00-00-00-02': { + 'Name': 'test-macpro', 'ClockOffset': 28180, 'ClassOfDevice': + 3670276, 'PageScanMode': 0, 'LastNameUpdate': + datetime.datetime( + 2011, 4, 7, 17, 56, 53, 524275, tzinfo=pytz.utc), + 'PageScanPeriod': 2, 'PageScanRepetitionMode': 1, + 'LastInquiryUpdate': + datetime.datetime( + 2012, 7, 10, 22, 5, 0, 20116, tzinfo=pytz.utc)}}} + + event_queue_consumer = self._ParsePlistWithPlugin( + self._plugin, 'nested', top_level_dict_many_keys) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 5) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-04-07 17:56:53.524275') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals(event_object.root, u'/DeviceCache/44-00-00-00-00-02') + self.assertEquals(event_object.key, u'LastNameUpdate') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/install_history.py b/plaso/parsers/plist_plugins/install_history.py new file mode 100644 index 0000000..c66f450 --- /dev/null +++ b/plaso/parsers/plist_plugins/install_history.py @@ -0,0 +1,67 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the install history plist plugin in Plaso.""" + +from plaso.events import plist_event +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class InstallHistoryPlugin(interface.PlistPlugin): + """Plist plugin that extracts the installation history.""" + + NAME = 'plist_install_history' + DESCRIPTION = u'Parser for installation history plist files.' + + PLIST_PATH = 'InstallHistory.plist' + PLIST_KEYS = frozenset([ + 'date', 'displayName', 'displayVersion', + 'processName', 'packageIdentifiers']) + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, top_level=None, + **unused_kwargs): + """Extracts relevant install history entries. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + top_level: Optional plist in dictionary form. The default is None. + """ + for entry in top_level: + packages = [] + for package in entry.get('packageIdentifiers'): + packages.append(package) + description = ( + u'Installation of [{0:s} {1:s}] using [{2:s}]. ' + u'Packages: {3:s}.').format( + entry.get('displayName'), entry.get('displayVersion'), + entry.get('processName'), u', '.join(packages)) + event_object = plist_event.PlistEvent( + u'/item', u'', entry.get('date'), description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +plist.PlistParser.RegisterPlugin(InstallHistoryPlugin) diff --git a/plaso/parsers/plist_plugins/install_history_test.py b/plaso/parsers/plist_plugins/install_history_test.py new file mode 100644 index 0000000..fd0a6e7 --- /dev/null +++ b/plaso/parsers/plist_plugins/install_history_test.py @@ -0,0 +1,77 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the install history plist plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import plist as plist_formatter +from plaso.parsers import plist +from plaso.parsers.plist_plugins import install_history +from plaso.parsers.plist_plugins import test_lib + + +class InstallHistoryPluginTest(test_lib.PlistPluginTestCase): + """Tests for the install history plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = install_history.InstallHistoryPlugin() + self._parser = plist.PlistParser() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['InstallHistory.plist']) + plist_name = 'InstallHistory.plist' + event_queue_consumer = self._ParsePlistFileWithPlugin( + self._parser, self._plugin, test_file, plist_name) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 7) + + timestamps = [] + for event_object in event_objects: + timestamps.append(event_object.timestamp) + expected_timestamps = frozenset([ + 1384225175000000, 1388205491000000, 1388232883000000, 1388232883000000, + 1388232883000000, 1388232883000000, 1390941528000000]) + self.assertTrue(set(timestamps) == expected_timestamps) + + event_object = event_objects[0] + self.assertEqual(event_object.key, u'') + self.assertEqual(event_object.root, u'/item') + expected_desc = ( + u'Installation of [OS X 10.9 (13A603)] using [OS X Installer]. ' + u'Packages: com.apple.pkg.BaseSystemBinaries, ' + u'com.apple.pkg.BaseSystemResources, ' + u'com.apple.pkg.Essentials, com.apple.pkg.BSD, ' + u'com.apple.pkg.JavaTools, com.apple.pkg.AdditionalEssentials, ' + u'com.apple.pkg.AdditionalSpeechVoices, ' + u'com.apple.pkg.AsianLanguagesSupport, com.apple.pkg.MediaFiles, ' + u'com.apple.pkg.JavaEssentials, com.apple.pkg.OxfordDictionaries, ' + u'com.apple.pkg.X11redirect, com.apple.pkg.OSInstall, ' + u'com.apple.pkg.update.compatibility.2013.001.') + self.assertEqual(event_object.desc, expected_desc) + expected_string = u'/item/ {}'.format(expected_desc) + expected_short = expected_string[:77] + u'...' + self._TestGetMessageStrings( + event_object, expected_string, expected_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/interface.py b/plaso/parsers/plist_plugins/interface.py new file mode 100644 index 0000000..1dd20ec --- /dev/null +++ b/plaso/parsers/plist_plugins/interface.py @@ -0,0 +1,323 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plist_interface contains basic interface for plist plugins within Plaso. + +Plist files are only one example of a type of object that the Plaso tool is +expected to encounter and process. There can be and are many other parsers +which are designed to process specific data types. + +PlistPlugin defines the attributes necessary for registration, discovery +and operation of plugins for plist files which will be used by PlistParser. +""" + +import abc +import logging + +from plaso.lib import errors +from plaso.parsers import plugins + + +class PlistPlugin(plugins.BasePlugin): + """This is an abstract class from which plugins should be based. + + The following are the attributes and methods expected to be overridden by a + plugin. + + Attributes: + PLIST_PATH - string of the filename the plugin is designed to process. + PLIST_KEY - list of keys holding values that are necessary for processing. + + Please note, PLIST_KEY is cAse sensitive and for a plugin to match a + plist file needs to contain at minimum the number of keys needed for + processing or WrongPlistPlugin is raised. + + For example if a Plist file contains the following keys, + {'foo': 1, 'bar': 2, 'opt': 3} with 'foo' and 'bar' being keys critical to + processing define PLIST_KEY as ['foo', 'bar']. If 'opt' is only optionally + defined it can still be accessed by manually processing self.top_level from + the plugin. + + Methods: + GetEntries() - extract and format info from keys and yields event.PlistEvent. + """ + + NAME = 'plist_plugin' + + # PLIST_PATH is a string for the filename this parser is designed to process. + # This is expected to be overriden by the processing plugin. + # Ex. 'com.apple.bluetooth.plist' + PLIST_PATH = 'any' + + # PLIST_KEYS is a list of keys required by a plugin. + # This is expected to be overriden by the processing plugin. + # Ex. frozenset(['DeviceCache', 'PairedDevices']) + PLIST_KEYS = frozenset(['any']) + + # This is expected to be overriden by the processing plugin. + # URLS should contain a list of URLs with additional information about + # this key or value. + # Ex. ['http://www.forensicswiki.org/wiki/Property_list_(plist)'] + URLS = [] + + @abc.abstractmethod + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, top_level=None, + match=None, **unused_kwargs): + """Extracts event objects from the values of entries within a plist. + + This is the main method that a plist plugin needs to implement. + + The contents of the plist keys defined in PLIST_KEYS will be made available + to the plugin as self.matched{'KEY': 'value'}. The plugin should implement + logic to parse this into a useful event for incorporation into the Plaso + timeline. + + For example if you want to note the timestamps of when devices were + LastInquiryUpdated you would need to examine the bluetooth config file + called 'com.apple.bluetooth' and need to look at devices under the key + 'DeviceCache'. To do this the plugin needs to define + PLIST_PATH = 'com.apple.bluetooth' and PLIST_KEYS = + frozenset(['DeviceCache']). IMPORTANT: this interface requires exact names + and is case sensitive. A unit test based on a real world file is expected + for each plist plugin. + + When a file with this key is encountered during processing self.matched is + populated and the plugin's GetEntries() is called. The plugin would have + self.matched = {'DeviceCache': [{'DE:AD:BE:EF:01': {'LastInquiryUpdate': + DateTime_Object}, 'DE:AD:BE:EF:01': {'LastInquiryUpdate': + DateTime_Object}'...}]} and needs to implement logic here to extract + values, format, and produce the data as a event.PlistEvent. + + The attributes for a PlistEvent should include the following: + root = Root key this event was extracted from. E.g. DeviceCache/ + key = Key the value resided in. E.g. 'DE:AD:BE:EF:01' + time = Date this artifact was created in microseconds(usec) from epoch. + desc = Short description. E.g. 'Device LastInquiryUpdated' + + See plist/bluetooth.py for the implemented example plugin. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + top_level: Optional plist in dictionary form. The default is None. + match: Optional dictionary containing extracted keys from PLIST_KEYS. + The default is None. + """ + + def Process( + self, parser_context, file_entry=None, parser_chain=None, plist_name=None, + top_level=None, **kwargs): + """Determine if this is the correct plugin; if so proceed with processing. + + Process() checks if the current plist being processed is a match for a + plugin by comparing the PATH and KEY requirements defined by a plugin. If + both match processing continues; else raise WrongPlistPlugin. + + This function also extracts the required keys as defined in self.PLIST_KEYS + from the plist and stores the result in self.match[key] and calls + self.GetEntries() which holds the processing logic implemented by the + plugin. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + plist_name: Name of the plist file. + top_level: Plist in dictionary form. + + Raises: + WrongPlistPlugin: If this plugin is not able to process the given file. + ValueError: If top_level or plist_name are not set. + """ + if plist_name is None or top_level is None: + raise ValueError(u'Top level or plist name are not set.') + + if plist_name.lower() != self.PLIST_PATH.lower(): + raise errors.WrongPlistPlugin(self.NAME, plist_name) + + if isinstance(top_level, dict): + if not set(top_level.keys()).issuperset(self.PLIST_KEYS): + raise errors.WrongPlistPlugin(self.NAME, plist_name) + + else: + # Make sure we are getting back an object that has an iterator. + if not hasattr(top_level, '__iter__'): + raise errors.WrongPlistPlugin(self.NAME, plist_name) + + # This is a list and we need to just look at the first level + # of keys there. + keys = [] + for top_level_entry in top_level: + if isinstance(top_level_entry, dict): + keys.extend(top_level_entry.keys()) + + # Compare this is a set, which removes possible duplicate entries + # in the list. + if not set(keys).issuperset(self.PLIST_KEYS): + raise errors.WrongPlistPlugin(self.NAME, plist_name) + + # This will raise if unhandled keyword arguments are passed. + super(PlistPlugin, self).Process(parser_context, **kwargs) + + logging.debug(u'Plist Plugin Used: {0:s} for: {1:s}'.format( + self.NAME, plist_name)) + match = GetKeys(top_level, self.PLIST_KEYS) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + self.GetEntries( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + top_level=top_level, match=match) + + +def RecurseKey(recur_item, root='', depth=15): + """Flattens nested dictionaries and lists by yielding it's values. + + The hierarchy of a plist file is a series of nested dictionaries and lists. + This is a helper function helps plugins navigate the structure without + having to reimplement their own recursive methods. + + This method implements an overridable depth limit to prevent processing + extremely deeply nested plists. If the limit is reached a debug message is + logged indicating which key processing stopped on. + + Example Input Plist: + recur_item = { DeviceRoot: { DeviceMAC1: [Value1, Value2, Value3], + DeviceMAC2: [Value1, Value2, Value3]}} + + Example Output: + ('', DeviceRoot, {DeviceMACs...}) + (DeviceRoot, DeviceMAC1, [Value1, Value2, Value3]) + (DeviceRoot, DeviceMAC2, [Value1, Value2, Value3]) + + Args: + recur_item: An object to be checked for additional nested items. + root: The pathname of the current working key. + depth: A counter to ensure we stop at the maximum recursion depth. + + Yields: + A tuple of the root, key, and value from a plist. + """ + if depth < 1: + logging.debug(u'Recursion limit hit for key: {0:s}'.format(root)) + return + + if type(recur_item) in (list, tuple): + for recur in recur_item: + for key in RecurseKey(recur, root, depth): + yield key + return + + if not hasattr(recur_item, 'iteritems'): + return + + for key, value in recur_item.iteritems(): + yield root, key, value + if isinstance(value, dict): + value = [value] + if isinstance(value, list): + for item in value: + if isinstance(item, dict): + for keyval in RecurseKey( + item, root=root + u'/' + key, depth=depth - 1): + yield keyval + + +def GetKeys(top_level, keys, depth=1): + """Helper function to return keys nested in a plist dict. + + By default this function will return the values for the named keys requested + by a plugin in match dictonary objecte. The default setting is to look + a single layer down from the root (same as the check for plugin + applicability). This level is suitable for most cases. + + For cases where there is varability in the name at the first level + (e.g. it is the MAC addresses of a device, or a UUID) it is possible to + override the depth limit and use GetKeys to fetch from a deeper level. + + E.g. + Top_Level (root): # depth = 0 + |-- Key_Name_is_UUID_Generated_At_Install 1234-5678-8 # depth = 1 + | |-- Interesting_SubKey_with_value_to_Process: [Values, ...] # depth = 2 + + Args: + top_level: Plist in dictionary form. + keys: A list of keys that should be returned. + depth: Defines how many levels deep to check for a match. + + Returns: + A dictionary with just the keys requested or an empty dict if the plist + is flat, eg. top_level is a list instead of a dict object. + """ + match = {} + if not isinstance(top_level, dict): + # Return an empty dict here if top_level is a list object, which happens + # if the plist file is flat. + return match + keys = set(keys) + + if depth == 1: + for key in keys: + match[key] = top_level.get(key, None) + else: + for _, parsed_key, parsed_value in RecurseKey(top_level, depth=depth): + if parsed_key in keys: + match[parsed_key] = parsed_value + if set(match.keys()) == keys: + return match + return match + + +def GetKeysDefaultEmpty(top_level, keys, depth=1): + """Return keys nested in a plist dict, defaulting to an empty value. + + The method GetKeys fails if the supplied key does not exist within the + plist object. This alternate method behaves the same way as GetKeys + except that instead of raising an error if the key doesn't exist it will + assign a default empty value ('') to the field. + + Args: + top_level: Plist in dictionary form. + keys: A list of keys that should be returned. + depth: Defines how many levels deep to check for a match. + + Returns: + A dictionary with just the keys requested. + """ + keys = set(keys) + match = {} + + if depth == 1: + for key in keys: + value = top_level.get(key, None) + if value is not None: + match[key] = value + else: + for _, parsed_key, parsed_value in RecurseKey(top_level, depth=depth): + if parsed_key in keys: + match[parsed_key] = parsed_value + if set(match.keys()) == keys: + return match + return match diff --git a/plaso/parsers/plist_plugins/interface_test.py b/plaso/parsers/plist_plugins/interface_test.py new file mode 100644 index 0000000..8355055 --- /dev/null +++ b/plaso/parsers/plist_plugins/interface_test.py @@ -0,0 +1,135 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the plist plugin interface.""" + +import unittest + +from plaso.events import plist_event +from plaso.lib import errors +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface +from plaso.parsers.plist_plugins import test_lib + + +class MockPlugin(interface.PlistPlugin): + """Mock plugin.""" + + NAME = 'mock_plist_plugin' + DESCRIPTION = u'Parser for testing parsing plist files.' + + PLIST_PATH = 'plist_binary' + PLIST_KEYS = frozenset(['DeviceCache', 'PairedDevices']) + + def GetEntries(self, parser_context, **unused_kwargs): + event_object = plist_event.PlistEvent( + u'/DeviceCache/44-00-00-00-00-00', u'LastInquiryUpdate', + 1351827808261762) + parser_context.ProduceEvent(event_object, parser_chain=self.NAME) + + +class TestPlistPlugin(test_lib.PlistPluginTestCase): + """Tests for the plist plugin interface.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._top_level_dict = { + 'DeviceCache': { + '44-00-00-00-00-04': { + 'Name': 'Apple Magic Trackpad 2', 'LMPSubversion': 796, + 'Services': '', 'BatteryPercent': 0.61}, + '44-00-00-00-00-02': { + 'Name': 'test-macpro', 'ClockOffset': 28180, + 'PageScanPeriod': 2, 'PageScanRepetitionMode': 1}}} + + def testGetPluginNames(self): + """Tests the GetPluginNames function.""" + plugin_names = plist.PlistParser.GetPluginNames() + + self.assertNotEquals(plugin_names, []) + + self.assertTrue('plist_default' in plugin_names) + + def testProcess(self): + """Tests the Process function.""" + # Ensure the plugin only processes if both filename and keys exist. + plugin_object = MockPlugin() + + # Test correct filename and keys. + top_level = {'DeviceCache': 1, 'PairedDevices': 1} + event_object_generator = self._ParsePlistWithPlugin( + plugin_object, 'plist_binary', top_level) + event_objects = self._GetEventObjectsFromQueue(event_object_generator) + + self.assertEquals(len(event_objects), 1) + + # Correct filename with odd filename cAsinG. Adding an extra useless key. + top_level = {'DeviceCache': 1, 'PairedDevices': 1, 'R@ndomExtraKey': 1} + event_object_generator = self._ParsePlistWithPlugin( + plugin_object, 'pLiSt_BinAry', top_level) + event_objects = self._GetEventObjectsFromQueue(event_object_generator) + + self.assertEquals(len(event_objects), 1) + + # Test wrong filename. + top_level = {'DeviceCache': 1, 'PairedDevices': 1} + with self.assertRaises(errors.WrongPlistPlugin): + _ = self._ParsePlistWithPlugin( + plugin_object, 'wrong_file.plist', top_level) + + # Test not enough required keys. + top_level = {'Useless_Key': 0, 'PairedDevices': 1} + with self.assertRaises(errors.WrongPlistPlugin): + _ = self._ParsePlistWithPlugin( + plugin_object, 'plist_binary.plist', top_level) + + def testRecurseKey(self): + """Tests the RecurseKey function.""" + # Ensure with a depth of 1 we only return the root key. + result = list(interface.RecurseKey(self._top_level_dict, depth=1)) + self.assertEquals(len(result), 1) + + # Trying again with depth limit of 2 this time. + result = list(interface.RecurseKey(self._top_level_dict, depth=2)) + self.assertEquals(len(result), 3) + + # A depth of two should gives us root plus the two devices. Let's check. + my_keys = [] + for unused_root, key, unused_value in result: + my_keys.append(key) + expected = set(['DeviceCache', '44-00-00-00-00-04', '44-00-00-00-00-02']) + self.assertTrue(expected == set(my_keys)) + + def testGetKeys(self): + """Tests the GetKeys function.""" + # Match DeviceCache from the root level. + key = ['DeviceCache'] + result = interface.GetKeys(self._top_level_dict, key) + self.assertEquals(len(result), 1) + + # Look for a key nested a layer beneath DeviceCache from root level. + # Note: overriding the default depth to look deeper. + key = ['44-00-00-00-00-02'] + result = interface.GetKeys(self._top_level_dict, key, depth=2) + self.assertEquals(len(result), 1) + + # Check the value of the result was extracted as expected. + self.assertTrue('test-macpro' == result[key[0]]['Name']) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/ipod.py b/plaso/parsers/plist_plugins/ipod.py new file mode 100644 index 0000000..4daa1c1 --- /dev/null +++ b/plaso/parsers/plist_plugins/ipod.py @@ -0,0 +1,91 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a plist plugin for the iPod/iPhone storage plist.""" + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +class IPodPlistEvent(time_events.PythonDatetimeEvent): + """An event object for an entry in the iPod plist file.""" + + DATA_TYPE = 'ipod:device:entry' + + def __init__(self, datetime_timestamp, device_id, device_info): + """Initialize the event. + + Args: + datetime_timestamp: The timestamp for the event as a datetime object. + device_id: The device ID. + device_info: A dict that contains extracted information from the plist. + """ + super(IPodPlistEvent, self).__init__( + datetime_timestamp, eventdata.EventTimestamp.LAST_CONNECTED) + + self.device_id = device_id + + # Save the other attributes. + for key, value in device_info.iteritems(): + if key == 'Connected': + continue + attribute_name = key.lower().replace(u' ', u'_') + setattr(self, attribute_name, value) + + +class IPodPlugin(interface.PlistPlugin): + """Plugin to extract iPod/iPad/iPhone device information.""" + + NAME = 'ipod_device' + DESCRIPTION = u'Parser for iPod, iPad and iPhone plist files.' + + PLIST_PATH = 'com.apple.iPod.plist' + PLIST_KEYS = frozenset(['Devices']) + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, match=None, + **unused_kwargs): + """Extract device information from the iPod plist. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + match: Optional dictionary containing keys extracted from PLIST_KEYS. + The default is None. + """ + if not 'Devices' in match: + return + + devices = match['Devices'] + if not devices: + return + + for device, device_info in devices.iteritems(): + if 'Connected' not in device_info: + continue + event_object = IPodPlistEvent( + device_info.get('Connected'), device, device_info) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +plist.PlistParser.RegisterPlugin(IPodPlugin) diff --git a/plaso/parsers/plist_plugins/ipod_test.py b/plaso/parsers/plist_plugins/ipod_test.py new file mode 100644 index 0000000..addd128 --- /dev/null +++ b/plaso/parsers/plist_plugins/ipod_test.py @@ -0,0 +1,77 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the iPod plist plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import ipod as ipod_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import plist +from plaso.parsers.plist_plugins import ipod +from plaso.parsers.plist_plugins import test_lib + + +class TestIPodPlugin(test_lib.PlistPluginTestCase): + """Tests for the iPod plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = ipod.IPodPlugin() + self._parser = plist.PlistParser() + + def testProcess(self): + """Tests the Process function.""" + plist_name = 'com.apple.iPod.plist' + test_file = self._GetTestFilePath([plist_name]) + event_queue_consumer = self._ParsePlistFileWithPlugin( + self._parser, self._plugin, test_file, plist_name) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 4) + + event_object = event_objects[1] + + timestamp = timelib_test.CopyStringToTimestamp('2013-10-09 19:27:54') + self.assertEquals(event_object.timestamp, timestamp) + + expected_string = ( + u'Device ID: 4C6F6F6E65000000 Type: iPhone [10016] Connected 1 times ' + u'Serial nr: 526F676572 IMEI [012345678901234]') + + self._TestGetMessageStrings( + event_object, expected_string, expected_string[0:77] + '...') + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.LAST_CONNECTED) + + self.assertEquals(event_object.device_class, u'iPhone') + self.assertEquals(event_object.device_id, u'4C6F6F6E65000000') + self.assertEquals(event_object.firmware_version, 256) + self.assertEquals(event_object.imei, u'012345678901234') + self.assertEquals(event_object.use_count, 1) + + event_object = event_objects[3] + timestamp = timelib_test.CopyStringToTimestamp('1995-11-22 18:25:07') + self.assertEquals(event_object.timestamp, timestamp) + self.assertEquals(event_object.device_id, u'0000A11300000000') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/macuser.py b/plaso/parsers/plist_plugins/macuser.py new file mode 100644 index 0000000..c557678 --- /dev/null +++ b/plaso/parsers/plist_plugins/macuser.py @@ -0,0 +1,172 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Mac OS X user plist plugin.""" + +# TODO: Only plists from Mac OS X 10.8 and 10.9 were tested. Look at other +# versions as well. + +import binascii + +from binplist import binplist +from dfvfs.file_io import fake_file_io +from dfvfs.path import fake_path_spec +from dfvfs.resolver import context +from xml.etree import ElementTree + +from plaso.events import plist_event +from plaso.lib import timelib +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class MacUserPlugin(interface.PlistPlugin): + """Basic plugin to extract timestamp Mac user information.""" + + NAME = 'plist_macuser' + DESCRIPTION = u'Parser for Mac OS X user plist files.' + + # The PLIST_PATH is dynamic, "user".plist is the name of the + # Mac OS X user. + PLIST_KEYS = frozenset([ + 'name', 'uid', 'home', + 'passwordpolicyoptions', 'ShadowHashData']) + + _ROOT = u'/' + + def Process( + self, parser_context, file_entry=None, parser_chain=None, plist_name=None, + top_level=None, **kwargs): + """Check if it is a valid Mac OS X system account plist file name. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + plist_name: name of the plist file. + top_level: dictionary with the plist file parsed. + """ + super(MacUserPlugin, self).Process( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + plist_name=self.PLIST_PATH, top_level=top_level, **kwargs) + + # Generated events: + # name: string with the system user. + # uid: user ID. + # passwordpolicyoptions: XML Plist structures with the timestamp. + # passwordLastSetTime: last time the password was changed. + # lastLoginTimestamp: last time the user was authenticated (*). + # failedLoginTimestamp: last time the user passwd was incorrectly(*). + # failedLoginCount: times of incorrect passwords. + # (*): depending on the situation, these timestamps are reset (0 value). + # It is translated by the library as a 2001-01-01 00:00:00 (COCAO + # zero time representation). If this happens, the event is not yield. + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, match=None, + **unused_kwargs): + """Extracts relevant user timestamp entries. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + match: Optional dictionary containing keys extracted from PLIST_KEYS. + The default is None. + """ + account = match['name'][0] + uid = match['uid'][0] + cocoa_zero = ( + timelib.Timestamp.COCOA_TIME_TO_POSIX_BASE * + timelib.Timestamp.MICRO_SECONDS_PER_SECOND) + # INFO: binplist return a string with the Plist XML. + for policy in match['passwordpolicyoptions']: + xml_policy = ElementTree.fromstring(policy) + for dict_elements in xml_policy.iterfind('dict'): + key_values = [value.text for value in dict_elements.getchildren()] + policy_dict = dict(zip(key_values[0::2], key_values[1::2])) + + if policy_dict.get('passwordLastSetTime', 0): + timestamp = timelib.Timestamp.FromTimeString( + policy_dict.get('passwordLastSetTime', '0')) + if timestamp > cocoa_zero: + # Extract the hash password information. + # It is store in the attribure ShadowHasData which is + # a binary plist data; However binplist only extract one + # level of binary plist, then it returns this information + # as a string. + + # TODO: change this into a DataRange instead. For this we + # need the file offset and size of the ShadowHashData value data. + resolver_context = context.Context() + fake_file = fake_file_io.FakeFile( + resolver_context, match['ShadowHashData'][0]) + fake_file.open(path_spec=fake_path_spec.FakePathSpec( + location=u'ShadowHashData')) + + try: + plist_file = binplist.BinaryPlist(file_obj=fake_file) + top_level = plist_file.Parse() + except binplist.FormatError: + top_level = dict() + salted_hash = top_level.get('SALTED-SHA512-PBKDF2', None) + if salted_hash: + password_hash = u'$ml${0:d}${1:s}${2:s}'.format( + salted_hash['iterations'], + binascii.hexlify(salted_hash['salt']), + binascii.hexlify(salted_hash['entropy'])) + else: + password_hash = u'N/A' + description = ( + u'Last time {0:s} ({1!s}) changed the password: {2!s}').format( + account, uid, password_hash) + event_object = plist_event.PlistTimeEvent( + self._ROOT, u'passwordLastSetTime', timestamp, description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if policy_dict.get('lastLoginTimestamp', 0): + timestamp = timelib.Timestamp.FromTimeString( + policy_dict.get('lastLoginTimestamp', '0')) + description = u'Last login from {0:s} ({1!s})'.format(account, uid) + if timestamp > cocoa_zero: + event_object = plist_event.PlistTimeEvent( + self._ROOT, u'lastLoginTimestamp', timestamp, description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if policy_dict.get('failedLoginTimestamp', 0): + timestamp = timelib.Timestamp.FromTimeString( + policy_dict.get('failedLoginTimestamp', '0')) + description = ( + u'Last failed login from {0:s} ({1!s}) ({2!s} times)').format( + account, uid, policy_dict['failedLoginCount']) + if timestamp > cocoa_zero: + event_object = plist_event.PlistTimeEvent( + self._ROOT, u'failedLoginTimestamp', timestamp, description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +plist.PlistParser.RegisterPlugin(MacUserPlugin) diff --git a/plaso/parsers/plist_plugins/macuser_test.py b/plaso/parsers/plist_plugins/macuser_test.py new file mode 100644 index 0000000..f056513 --- /dev/null +++ b/plaso/parsers/plist_plugins/macuser_test.py @@ -0,0 +1,73 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Mac OS X local users plist plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import plist as plist_formatter +from plaso.lib import timelib_test +from plaso.parsers import plist +from plaso.parsers.plist_plugins import macuser +from plaso.parsers.plist_plugins import test_lib + + +class MacUserPluginTest(test_lib.PlistPluginTestCase): + """Tests for the Mac OS X local user plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = macuser.MacUserPlugin() + self._parser = plist.PlistParser() + + def testProcess(self): + """Tests the Process function.""" + plist_name = u'user.plist' + test_file = self._GetTestFilePath([plist_name]) + event_queue_consumer = self._ParsePlistFileWithPlugin( + self._parser, self._plugin, test_file, plist_name) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-28 04:35:47') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.key, u'passwordLastSetTime') + self.assertEqual(event_object.root, u'/') + expected_desc = ( + u'Last time user (501) changed the password: ' + u'$ml$37313$fa6cac1869263baa85cffc5e77a3d4ee164b7' + u'5536cae26ce8547108f60e3f554$a731dbb0e386b169af8' + u'9fbb33c255ceafc083c6bc5194853f72f11c550c42e4625' + u'ef113b66f3f8b51fc3cd39106bad5067db3f7f1491758ff' + u'e0d819a1b0aba20646fd61345d98c0c9a411bfd1144dd4b' + u'3c40ec0f148b66d5b9ab014449f9b2e103928ef21db6e25' + u'b536a60ff17a84e985be3aa7ba3a4c16b34e0d1d2066ae178') + self.assertEqual(event_object.desc, expected_desc) + expected_string = u'//passwordLastSetTime {}'.format(expected_desc) + expected_short = u'{}...'.format(expected_string[:77]) + self._TestGetMessageStrings( + event_object, expected_string, expected_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/safari.py b/plaso/parsers/plist_plugins/safari.py new file mode 100644 index 0000000..f268771 --- /dev/null +++ b/plaso/parsers/plist_plugins/safari.py @@ -0,0 +1,97 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a default plist plugin in Plaso.""" + +import logging + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +class SafariHistoryEvent(time_events.TimestampEvent): + """An EventObject for Safari history entries.""" + + def __init__(self, timestamp, history_entry): + """Initialize the event. + + Args: + timestamp: The timestamp of the Event, in microseconds since Unix Epoch. + history_entry: A dict object read from the Safari history plist. + """ + super(SafariHistoryEvent, self).__init__( + timestamp, eventdata.EventTimestamp.LAST_VISITED_TIME) + self.data_type = 'safari:history:visit' + self.url = history_entry.get('', None) + self.title = history_entry.get('title', None) + display_title = history_entry.get('displayTitle', None) + if display_title != self.title: + self.display_title = display_title + self.visit_count = history_entry.get('visitCount', None) + self.was_http_non_get = history_entry.get('lastVisitWasHTTPNonGet', None) + + +class SafariHistoryPlugin(interface.PlistPlugin): + """Plugin to extract Safari history timestamps.""" + + NAME = 'safari_history' + DESCRIPTION = u'Parser for Safari history plist files.' + + PLIST_PATH = 'History.plist' + PLIST_KEYS = frozenset(['WebHistoryDates', 'WebHistoryFileVersion']) + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, match=None, + **unused_kwargs): + """Extracts Safari history items. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + match: Optional dictionary containing keys extracted from PLIST_KEYS. + The default is None. + """ + if match.get('WebHistoryFileVersion', 0) != 1: + logging.warning(u'Unable to parse Safari version: {0:s}'.format( + match.get('WebHistoryFileVersion', 0))) + return + + for history_entry in match.get('WebHistoryDates', {}): + try: + time = timelib.Timestamp.FromCocoaTime(float( + history_entry.get('lastVisitedDate', 0))) + except ValueError: + logging.warning(u'Unable to translate timestamp: {0:s}'.format( + history_entry.get('lastVisitedDate', 0))) + continue + + if not time: + logging.debug('No timestamp set, skipping record.') + continue + + event_object = SafariHistoryEvent(time, history_entry) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +plist.PlistParser.RegisterPlugin(SafariHistoryPlugin) diff --git a/plaso/parsers/plist_plugins/safari_test.py b/plaso/parsers/plist_plugins/safari_test.py new file mode 100644 index 0000000..701834f --- /dev/null +++ b/plaso/parsers/plist_plugins/safari_test.py @@ -0,0 +1,65 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Safari history plist plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import plist as plist_formatter +from plaso.lib import timelib_test +from plaso.parsers import plist +from plaso.parsers.plist_plugins import safari +from plaso.parsers.plist_plugins import test_lib + + +class SafariPluginTest(test_lib.PlistPluginTestCase): + """Tests for the Safari history plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = safari.SafariHistoryPlugin() + self._parser = plist.PlistParser() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['History.plist']) + plist_name = 'History.plist' + event_queue_consumer = self._ParsePlistFileWithPlugin( + self._parser, self._plugin, test_file, plist_name) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # 18 entries in timeline. + self.assertEquals(len(event_objects), 18) + + event_object = event_objects[8] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-08 17:31:00') + self.assertEquals(event_objects[10].timestamp, expected_timestamp) + expected_url = u'http://netverslun.sci-mx.is/aminosyrur' + self.assertEquals(event_object.url, expected_url) + + expected_string = ( + u'Visited: {0:s} (Am\xedn\xf3s\xfdrur ) Visit Count: 1').format( + expected_url) + + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/softwareupdate.py b/plaso/parsers/plist_plugins/softwareupdate.py new file mode 100644 index 0000000..fa25af9 --- /dev/null +++ b/plaso/parsers/plist_plugins/softwareupdate.py @@ -0,0 +1,83 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a default plist plugin in Plaso.""" + +from plaso.events import plist_event +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class SoftwareUpdatePlugin(interface.PlistPlugin): + """Basic plugin to extract the Mac OS X update status.""" + + NAME = 'plist_softwareupdate' + DESCRIPTION = u'Parser for Mac OS X software update plist files.' + + PLIST_PATH = 'com.apple.SoftwareUpdate.plist' + PLIST_KEYS = frozenset([ + 'LastFullSuccessfulDate', 'LastSuccessfulDate', + 'LastAttemptSystemVersion', 'LastUpdatesAvailable', + 'LastRecommendedUpdatesAvailable', 'RecommendedUpdates']) + + # Generated events: + # LastFullSuccessfulDate: timestamp when Mac OS X was full update. + # LastSuccessfulDate: timestamp when Mac OS X was partially update. + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, match=None, + **unused_kwargs): + """Extracts relevant Mac OS X update entries. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + match: Optional dictionary containing keys extracted from PLIST_KEYS. + The default is None. + """ + root = '/' + key = '' + version = match.get('LastAttemptSystemVersion', u'N/A') + pending = match['LastUpdatesAvailable'] + + description = u'Last Mac OS X {0:s} full update.'.format(version) + event_object = plist_event.PlistEvent( + root, key, match['LastFullSuccessfulDate'], description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if pending: + software = [] + for update in match['RecommendedUpdates']: + software.append(u'{0:s}({1:s})'.format( + update['Identifier'], update['Product Key'])) + description = ( + u'Last Mac OS {0!s} partially update, pending {1!s}: {2:s}.').format( + version, pending, u','.join(software)) + event_object = plist_event.PlistEvent( + root, key, match['LastSuccessfulDate'], description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +plist.PlistParser.RegisterPlugin(SoftwareUpdatePlugin) diff --git a/plaso/parsers/plist_plugins/softwareupdate_test.py b/plaso/parsers/plist_plugins/softwareupdate_test.py new file mode 100644 index 0000000..3b182d9 --- /dev/null +++ b/plaso/parsers/plist_plugins/softwareupdate_test.py @@ -0,0 +1,65 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Software Update plist plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import plist as plist_formatter +from plaso.parsers import plist +from plaso.parsers.plist_plugins import softwareupdate +from plaso.parsers.plist_plugins import test_lib + + +class SoftwareUpdatePluginTest(test_lib.PlistPluginTestCase): + """Tests for the SoftwareUpdate plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = softwareupdate.SoftwareUpdatePlugin() + self._parser = plist.PlistParser() + + def testProcess(self): + """Tests the Process function.""" + plist_name = u'com.apple.SoftwareUpdate.plist' + test_file = self._GetTestFilePath([plist_name]) + event_queue_consumer = self._ParsePlistFileWithPlugin( + self._parser, self._plugin, test_file, plist_name) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 2) + event_object = event_objects[0] + self.assertEqual(event_object.key, u'') + self.assertEqual(event_object.root, u'/') + expected_desc = u'Last Mac OS X 10.9.1 (13B42) full update.' + self.assertEqual(event_object.desc, expected_desc) + expected_string = u'// {}'.format(expected_desc) + self._TestGetMessageStrings( + event_object, expected_string, expected_string) + + event_object = event_objects[1] + self.assertEqual(event_object.key, u'') + self.assertEqual(event_object.root, u'/') + expected_desc = ( + u'Last Mac OS 10.9.1 (13B42) partially ' + u'update, pending 1: RAWCameraUpdate5.03(031-2664).') + self.assertEqual(event_object.desc, expected_desc) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/spotlight.py b/plaso/parsers/plist_plugins/spotlight.py new file mode 100644 index 0000000..4e511ec --- /dev/null +++ b/plaso/parsers/plist_plugins/spotlight.py @@ -0,0 +1,67 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Spotlight searched terms plugin in Plaso.""" + +from plaso.events import plist_event +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class SpotlightPlugin(interface.PlistPlugin): + """Basic plugin to extract Spotlight.""" + + NAME = 'plist_spotlight' + DESCRIPTION = u'Parser for Spotlight plist files.' + + PLIST_PATH = 'com.apple.spotlight.plist' + PLIST_KEYS = frozenset(['UserShortcuts']) + + # Generated events: + # name of the item: searched term. + # PATH: path of the program associated to the term. + # LAST_USED: last time when it was executed. + # DISPLAY_NAME: the display name of the program associated. + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, match=None, + **unused_kwargs): + """Extracts relevant Spotlight entries. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + match: Optional dictionary containing keys extracted from PLIST_KEYS. + The default is None. + """ + for search_text, data in match['UserShortcuts'].iteritems(): + description = ( + u'Spotlight term searched "{0:s}" associate to {1:s} ' + u'({2:s})').format(search_text, data['DISPLAY_NAME'], data['PATH']) + event_object = plist_event.PlistEvent( + u'/UserShortcuts', search_text, data['LAST_USED'], description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +plist.PlistParser.RegisterPlugin(SpotlightPlugin) diff --git a/plaso/parsers/plist_plugins/spotlight_test.py b/plaso/parsers/plist_plugins/spotlight_test.py new file mode 100644 index 0000000..565d9cb --- /dev/null +++ b/plaso/parsers/plist_plugins/spotlight_test.py @@ -0,0 +1,69 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the spotlight plist plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import plist as plist_formatter +from plaso.parsers import plist +from plaso.parsers.plist_plugins import spotlight +from plaso.parsers.plist_plugins import test_lib + + +class SpotlightPluginTest(test_lib.PlistPluginTestCase): + """Tests for the spotlight plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = spotlight.SpotlightPlugin() + self._parser = plist.PlistParser() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['com.apple.spotlight.plist']) + plist_name = 'com.apple.spotlight.plist' + event_queue_consumer = self._ParsePlistFileWithPlugin( + self._parser, self._plugin, test_file, plist_name) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 9) + + timestamps = [] + for event_object in event_objects: + timestamps.append(event_object.timestamp) + expected_timestamps = frozenset([ + 1379937262090906, 1387822901900937, 1375236414408299, 1388331212005129, + 1376696381196456, 1386951868185477, 1380942616952359, 1389056477460443, + 1386111811136093]) + self.assertTrue(set(timestamps) == expected_timestamps) + + event_object = event_objects[1] + self.assertEqual(event_object.key, u'gr') + self.assertEqual(event_object.root, u'/UserShortcuts') + expected_desc = (u'Spotlight term searched "gr" associate to ' + u'Grab (/Applications/Utilities/Grab.app)') + self.assertEqual(event_object.desc, expected_desc) + expected_string = u'/UserShortcuts/gr {}'.format(expected_desc) + expected_short = expected_string[:77] + u'...' + self._TestGetMessageStrings( + event_object, expected_string, expected_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/spotlight_volume.py b/plaso/parsers/plist_plugins/spotlight_volume.py new file mode 100644 index 0000000..9773d76 --- /dev/null +++ b/plaso/parsers/plist_plugins/spotlight_volume.py @@ -0,0 +1,60 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Spotlight Volume Configuration plist in Plaso.""" + +from plaso.events import plist_event +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class SpotlightVolumePlugin(interface.PlistPlugin): + """Basic plugin to extract the Spotlight Volume Configuration.""" + + NAME = 'plist_spotlight_volume' + DESCRIPTION = u'Parser for Spotlight volume configuration plist files.' + + PLIST_PATH = 'VolumeConfiguration.plist' + PLIST_KEYS = frozenset(['Stores']) + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, match=None, + **unused_kwargs): + """Extracts relevant VolumeConfiguration Spotlight entries. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + match: Optional dictionary containing keys extracted from PLIST_KEYS. + The default is None. + """ + for volume_name, volume in match['Stores'].iteritems(): + description = u'Spotlight Volume {0:s} ({1:s}) activated.'.format( + volume_name, volume['PartialPath']) + event_object = plist_event.PlistEvent( + u'/Stores', '', volume['CreationDate'], description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +plist.PlistParser.RegisterPlugin(SpotlightVolumePlugin) diff --git a/plaso/parsers/plist_plugins/spotlight_volume_test.py b/plaso/parsers/plist_plugins/spotlight_volume_test.py new file mode 100644 index 0000000..311db84 --- /dev/null +++ b/plaso/parsers/plist_plugins/spotlight_volume_test.py @@ -0,0 +1,67 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Spotlight Volume configuration plist plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import plist as plist_formatter +from plaso.parsers import plist +from plaso.parsers.plist_plugins import spotlight_volume +from plaso.parsers.plist_plugins import test_lib + + +class SpotlightVolumePluginTest(test_lib.PlistPluginTestCase): + """Tests for the Spotlight Volume configuration plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = spotlight_volume.SpotlightVolumePlugin() + self._parser = plist.PlistParser() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['VolumeConfiguration.plist']) + plist_name = 'VolumeConfiguration.plist' + event_queue_consumer = self._ParsePlistFileWithPlugin( + self._parser, self._plugin, test_file, plist_name) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 2) + + timestamps = [] + for event_object in event_objects: + timestamps.append(event_object.timestamp) + expected_timestamps = frozenset([ + 1372139683000000, 1369657656000000]) + self.assertTrue(set(timestamps) == expected_timestamps) + + event_object = event_objects[0] + self.assertEqual(event_object.key, u'') + self.assertEqual(event_object.root, u'/Stores') + expected_desc = (u'Spotlight Volume 4D4BFEB5-7FE6-4033-AAAA-' + u'AAAABBBBCCCCDDDD (/.MobileBackups) activated.') + self.assertEqual(event_object.desc, expected_desc) + expected_string = u'/Stores/ {}'.format(expected_desc) + expected_short = expected_string[:77] + u'...' + self._TestGetMessageStrings( + event_object, expected_string, expected_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_plugins/test_lib.py b/plaso/parsers/plist_plugins/test_lib.py new file mode 100644 index 0000000..a3f7a39 --- /dev/null +++ b/plaso/parsers/plist_plugins/test_lib.py @@ -0,0 +1,87 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plist plugin related functions and classes for testing.""" + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.engine import single_process +from plaso.parsers import test_lib + + +class PlistPluginTestCase(test_lib.ParserTestCase): + """The unit test case for a plist plugin.""" + + def _ParsePlistFileWithPlugin( + self, parser_object, plugin_object, path, plist_name, + knowledge_base_values=None): + """Parses a file using the parser and plugin object. + + Args: + parser_object: the parser object. + plugin_object: the plugin object. + path: the path of the file to parse. + plist_name: the name of the plist to parse. + knowledge_base_values: optional dict containing the knowledge base + values. The default is None. + + Returns: + An event object queue consumer object (instance of + TestEventObjectQueueConsumer). + """ + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=path) + file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) + + file_object = file_entry.GetFileObject() + top_level_object = parser_object.GetTopLevel(file_object) + self.assertNotEquals(top_level_object, None) + + return self._ParsePlistWithPlugin( + plugin_object, plist_name, top_level_object, + knowledge_base_values=knowledge_base_values) + + def _ParsePlistWithPlugin( + self, plugin_object, plist_name, top_level_object, + knowledge_base_values=None): + """Parses a plist using the plugin object. + + Args: + plugin_object: the plugin object. + plist_name: the name of the plist to parse. + top_level_object: the top-level plist object. + knowledge_base_values: optional dict containing the knowledge base + values. The default is None. + + Returns: + An event object queue consumer object (instance of + TestEventObjectQueueConsumer). + """ + event_queue = single_process.SingleProcessQueue() + event_queue_consumer = test_lib.TestEventObjectQueueConsumer(event_queue) + + parse_error_queue = single_process.SingleProcessQueue() + + parser_context = self._GetParserContext( + event_queue, parse_error_queue, + knowledge_base_values=knowledge_base_values) + plugin_object.Process( + parser_context, plist_name=plist_name, top_level=top_level_object) + + return event_queue_consumer diff --git a/plaso/parsers/plist_plugins/timemachine.py b/plaso/parsers/plist_plugins/timemachine.py new file mode 100644 index 0000000..903f30f --- /dev/null +++ b/plaso/parsers/plist_plugins/timemachine.py @@ -0,0 +1,86 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a TimeMachine plist plugin in Plaso.""" + +import construct + +from plaso.events import plist_event +from plaso.parsers import plist +from plaso.parsers.plist_plugins import interface + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class TimeMachinePlugin(interface.PlistPlugin): + """Basic plugin to extract time machine hardisk and the backups.""" + + NAME = 'plist_timemachine' + DESCRIPTION = u'Parser for TimeMachine plist files.' + + PLIST_PATH = 'com.apple.TimeMachine.plist' + PLIST_KEYS = frozenset(['Destinations', 'RootVolumeUUID']) + + # Generated events: + # DestinationID: remote UUID hard disk where the backup is done. + # BackupAlias: structure that contains the extra information from the + # destinationID. + # SnapshotDates: list of the backup dates. + + TM_BACKUP_ALIAS = construct.Struct( + 'tm_backup_alias', + construct.Padding(10), + construct.PascalString('value', length_field=construct.UBInt8('length'))) + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, match=None, + **unused_kwargs): + """Extracts relevant TimeMachine entries. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + match: Optional dictionary containing keys extracted from PLIST_KEYS. + The default is None. + """ + root = '/Destinations' + key = 'item/SnapshotDates' + + # For each TimeMachine devices. + for destination in match['Destinations']: + hd_uuid = destination['DestinationID'] + if not hd_uuid: + hd_uuid = u'Unknown device' + alias = destination['BackupAlias'] + try: + alias = self.TM_BACKUP_ALIAS.parse(alias).value + except construct.FieldError: + alias = u'Unknown alias' + # For each Backup. + for timestamp in destination['SnapshotDates']: + description = u'TimeMachine Backup in {0:s} ({1:s})'.format( + alias, hd_uuid) + event_object = plist_event.PlistEvent(root, key, timestamp, description) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +plist.PlistParser.RegisterPlugin(TimeMachinePlugin) diff --git a/plaso/parsers/plist_plugins/timemachine_test.py b/plaso/parsers/plist_plugins/timemachine_test.py new file mode 100644 index 0000000..570fecb --- /dev/null +++ b/plaso/parsers/plist_plugins/timemachine_test.py @@ -0,0 +1,72 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the timemachine plist plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import plist as plist_formatter +from plaso.parsers import plist +from plaso.parsers.plist_plugins import timemachine +from plaso.parsers.plist_plugins import test_lib + + +class TimeMachinePluginTest(test_lib.PlistPluginTestCase): + """Tests for the timemachine plist plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = timemachine.TimeMachinePlugin() + self._parser = plist.PlistParser() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['com.apple.TimeMachine.plist']) + plist_name = 'com.apple.timemachine.plist' + event_object_generator = self._ParsePlistFileWithPlugin( + self._parser, self._plugin, test_file, plist_name) + event_objects = self._GetEventObjectsFromQueue(event_object_generator) + + self.assertEquals(len(event_objects), 13) + + timestamps = [] + for event_object in event_objects: + timestamps.append(event_object.timestamp) + expected_timestamps = frozenset([ + 1379165051000000, 1380098455000000, 1380810276000000, 1381883538000000, + 1382647890000000, 1383351739000000, 1384090020000000, 1385130914000000, + 1386265911000000, 1386689852000000, 1387723091000000, 1388840950000000, + 1388842718000000]) + self.assertTrue(set(timestamps) == expected_timestamps) + + event_object = event_objects[0] + self.assertEqual(event_object.root, u'/Destinations') + self.assertEqual(event_object.key, u'item/SnapshotDates') + expected_desc = ( + u'TimeMachine Backup in BackUpFast ' + u'(5B33C22B-A4A1-4024-A2F5-C9979C4AAAAA)') + self.assertEqual(event_object.desc, expected_desc) + expected_string = u'/Destinations/item/SnapshotDates {}'.format( + expected_desc) + expected_short = expected_string[:77] + u'...' + self._TestGetMessageStrings( + event_object, expected_string, expected_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plist_test.py b/plaso/parsers/plist_test.py new file mode 100644 index 0000000..4613022 --- /dev/null +++ b/plaso/parsers/plist_test.py @@ -0,0 +1,74 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests the plist parser.""" + +import unittest + +from plaso.parsers import plist +# Register all plugins. +from plaso.parsers import plist_plugins # pylint: disable=unused-import +from plaso.parsers import test_lib + + +class PlistParserTest(test_lib.ParserTestCase): + """Tests the plist parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = plist.PlistParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['plist_binary']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 12) + + timestamps, roots, keys = zip( + *[(evt.timestamp, evt.root, evt.key) for evt in event_objects]) + + expected_timestamps = frozenset([ + 1345251192528750, 1351827808261762, 1345251268370453, + 1351818803000000, 1351819298997672, 1351818797324095, + 1301012201414766, 1302199013524275, 1341957900020116, + 1350666391557044, 1350666385239661, 1341957896010535]) + + self.assertTrue(set(expected_timestamps) == set(timestamps)) + self.assertEquals(12, len(set(timestamps))) + + expected_roots = frozenset([ + '/DeviceCache/00-0d-fd-00-00-00', + '/DeviceCache/44-00-00-00-00-00', + '/DeviceCache/44-00-00-00-00-01', + '/DeviceCache/44-00-00-00-00-02', + '/DeviceCache/44-00-00-00-00-03', + '/DeviceCache/44-00-00-00-00-04']) + self.assertTrue(expected_roots == set(roots)) + self.assertEquals(6, len(set(roots))) + + expected_keys = frozenset([ + u'LastInquiryUpdate', + u'LastServicesUpdate', + u'LastNameUpdate']) + self.assertTrue(expected_keys == set(keys)) + self.assertEquals(3, len(set(keys))) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/pls_recall.py b/plaso/parsers/pls_recall.py new file mode 100644 index 0000000..d31cce3 --- /dev/null +++ b/plaso/parsers/pls_recall.py @@ -0,0 +1,173 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specifiSc language governing permissions and +# limitations under the License. +"""Parser for PL-SQL Developer Recall files.""" + +import construct +import os + +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import timelib +from plaso.lib import utils +from plaso.parsers import interface +from plaso.parsers import manager + + +class PlsRecallEvent(event.EventObject): + """Convenience class for a PL-SQL Recall file container.""" + + DATA_TYPE = 'PLSRecall:event' + + def __init__(self, timestamp, sequence, user, database, query): + """Initializes the event object. + + Args: + timestamp: The timestamp when the entry was created. + sequence: Sequence indicates the order of execution. + username: The username that made the query. + database_name: String containing the databe name. + query: String containing the PL-SQL query. + """ + super(PlsRecallEvent, self).__init__() + self.timestamp = timestamp + self.sequence = sequence + self.username = user + self.database_name = database + self.query = query + + +class PlsRecallParser(interface.BaseParser): + """Parse PL-SQL Recall files. + + Parser is based on a: + TRecallRecord = packed record + Sequence: Integer; + TimeStamp: TDateTime; + Username: array[0..30] of Char; + Database: array[0..80] of Char; + Text: array[0..4000] of Char; + end; + + Delphi TDateTime is a little endian 64-bit + floating point without any time zone information + """ + + NAME = 'pls_recall' + DESCRIPTION = u'Parser for PL-SQL Recall files.' + + PLS_STRUCT = construct.Struct( + 'PL-SQL_Recall', + construct.ULInt32('Sequence'), + construct.LFloat64('TimeStamp'), + construct.String('Username', 31, None, '\x00'), + construct.String('Database', 81, None, '\x00'), + construct.String('Query', 4001, None, '\x00')) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract entries from a PLSRecall.dat file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + + try: + is_pls = self.VerifyFile(file_object) + except (IOError, construct.FieldError) as exception: + file_object.close() + raise errors.UnableToParseFile(( + u'Not a PLSrecall File, unable to parse.' + u'with error: {0:s}').format(exception)) + + if not is_pls: + file_object.close() + raise errors.UnableToParseFile( + u'Not a PLSRecall File, unable to parse.') + + file_object.seek(0, os.SEEK_SET) + pls_record = self.PLS_STRUCT.parse_stream(file_object) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + while pls_record: + event_object = PlsRecallEvent( + timelib.Timestamp.FromDelphiTime(pls_record.TimeStamp), + pls_record.Sequence, pls_record.Username, + pls_record.Database, pls_record.Query) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + try: + pls_record = self.PLS_STRUCT.parse_stream(file_object) + except construct.FieldError as exception: + # The code has reached the end of file (EOF). + break + + file_object.close() + + def VerifyFile(self, file_object): + """Check if the file is a PLSRecall.dat file. + + Args: + file_object: file that we want to check. + + Returns: + True if this is a valid PLSRecall.dat file, otherwise False. + """ + file_object.seek(0, os.SEEK_SET) + + # The file consists of PL-SQL structures that are equal + # size (4125 bytes) TRecallRecord records. It should be + # noted that the query value is free form. + try: + structure = self.PLS_STRUCT.parse_stream(file_object) + except (IOError, construct.FieldError): + return False + + # Verify few entries inside the structure. + try: + timestamp = timelib.Timestamp.FromDelphiTime(structure.TimeStamp) + except ValueError: + return False + + if timestamp <= 0: + return False + + # TODO: Add other verification checks here. For instance make sure + # that the query actually looks like a SQL query. This structure produces a + # lot of false positives and thus we need to add additional verification to + # make sure we are not parsing non-PLSRecall files. + # Another check might be to make sure the username looks legitimate, or the + # sequence number, or the database name. + # For now we just check if all three fields pass our "is this a text" test. + if not utils.IsText(structure.Username): + return False + if not utils.IsText(structure.Query): + return False + if not utils.IsText(structure.Database): + return False + + return True + + +manager.ParsersManager.RegisterParser(PlsRecallParser) diff --git a/plaso/parsers/pls_recall_test.py b/plaso/parsers/pls_recall_test.py new file mode 100644 index 0000000..3fc56da --- /dev/null +++ b/plaso/parsers/pls_recall_test.py @@ -0,0 +1,78 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for PL-SQL recall file parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import pls_recall as pls_recall_formatter +from plaso.lib import timelib_test +from plaso.parsers import pls_recall +from plaso.parsers import test_lib + + +class PlsRecallTest(test_lib.ParserTestCase): + """Tests for PL-SQL recall file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = pls_recall.PlsRecallParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['PLSRecall_Test.dat']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # There are two events in test file. + self.assertEquals(len(event_objects), 2) + + event_object = event_objects[0] + + timestamp_expected = timelib_test.CopyStringToTimestamp( + '2013-06-18 19:50:00:00:00') + self.assertEqual(event_object.timestamp, timestamp_expected) + + sequence_expected = 206 + self.assertEqual(event_object.sequence, sequence_expected) + + username_expected = u'tsltmp' + self.assertEqual(event_object.username, username_expected) + + database_name_expected = u'DB11' + self.assertEqual(event_object.database_name, database_name_expected) + + # The test file actually has 'test_databae' in the SQL string. + query_expected = u'SELECT * from test_databae where date > \'01/01/2012\'' + self.assertEqual(event_object.query, query_expected) + + expected_msg = ( + u'Sequence #206 ' + u'User: tsltmp ' + u'Database Name: DB11 ' + u'Query: SELECT * from test_databae where date > \'01/01/2012\'') + + expected_msg_short = ( + u'206 tsltmp DB11 ' + u'SELECT * from test_databae where date > \'01/01/2012\'') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/plugins.py b/plaso/parsers/plugins.py new file mode 100644 index 0000000..c7c3a5d --- /dev/null +++ b/plaso/parsers/plugins.py @@ -0,0 +1,118 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains basic interface for plugins within Plaso. + +This library serves a basis for all plugins in Plaso, whether that are +Windows registry plugins, SQLite plugins or any other parsing plugins. + +This is provided as a separate file to make it easier to inherit in other +projects that may want to use the Plaso plugin system. +""" + + +class BasePlugin(object): + """A plugin is a lightweight parser that makes use of a common data structure. + + When a data structure is common amongst several artifacts or files a plugin + infrastructure can be written to make writing parsers simpler. The goal of a + plugin is have only a single parser that understands the data structure that + can call plugins that have specialized knowledge of certain structures. + + An example of this is a SQLite database. A plugin can be written that has + knowledge of certain database, such as Chrome history, or Skype history, etc. + This can be done without needing to write a full fledged parser that needs + to re-implement the data structure knowledge. A single parser can be created + that calls the plugins to see if it knows that particular database. + + Another example is Windows registry, there a single parser that can parse + the registry can be made and the job of a single plugin is to parse a + particular registry key. The parser can then read a registry key and compare + it to a list of available plugins to see if it can be parsed. + """ + + # The name of the plugin. This is the name that is used in the registration + # and used for parser/plugin selection, so this needs to be concise and unique + # for all plugins/parsers, eg: 'Chrome', 'Safari', 'UserAssist', etc. + NAME = 'base_plugin' + + DESCRIPTION = u'' + + # The URLS should contain a list of URLs with additional information about + # the plugin, for instance some additional reading material. That can be + # a description of the data structure, or how to read the data that comes + # out of the parser, etc. So in essence this is a field to define pointers + # to additional resources to assist the practitioner reading the output of + # the plugin. + URLS = [] + + # TODO: remove. + @property + def plugin_name(self): + """Return the name of the plugin.""" + return self.NAME + + def _BuildParserChain(self, parser_chain=None): + """Return the parser chain with the addition of the current parser. + + Args: + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Returns: + The parser chain, with the addition of the current parser. + """ + if not parser_chain: + return self.NAME + + return u'/'.join([parser_chain, self.NAME]) + + def Process(self, unused_parser_context, unused_parser_chain=None, **kwargs): + """Evaluates if this is the correct plugin and processes data accordingly. + + The purpose of the process function is to evaluate if this particular + plugin is the correct one for the particular data structure at hand. + This function accepts one value to use for evaluation, that could be + a registry key, list of table names for a database or any other criteria + that can be used to evaluate if the plugin should be run or not. + + Args: + parser_context: A parser context object (instance of ParserContext). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + kwargs: Depending on the plugin they may require different sets of + arguments to be able to evaluate whether or not this is + the correct plugin. + + Raises: + ValueError: When there are unused keyword arguments. + """ + if kwargs: + raise ValueError(u'Unused keyword arguments: {0:s}.'.format( + kwargs.keys())) + + +class BasePluginCache(object): + """A generic cache object for plugins. + + This cache object can be used to store various information that needs + to be cached to speed up code execution. + """ + + def GetResults(self, attribute): + """Return back a cached attribute if it exists.""" + return getattr(self, attribute, None) diff --git a/plaso/parsers/popcontest.py b/plaso/parsers/popcontest.py new file mode 100644 index 0000000..673ae6c --- /dev/null +++ b/plaso/parsers/popcontest.py @@ -0,0 +1,275 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains Popularity Contest log file parser in plaso. + + Information updated 20 january 2014. + From Debian Package Popularity Contest + Avery Pennarun + + From 'http://www.unix.com/man-page/Linux/8/popularity-contest/': + ' + The popularity-contest command gathers information about Debian pack- + ages installed on the system, and prints the name of the most recently + used executable program in that package as well as its last-accessed + time (atime) and last-attribute-changed time (ctime) to stdout. + + When aggregated with the output of popularity-contest from many other + systems, this information is valuable because it can be used to deter- + mine which Debian packages are commonly installed, used, or installed + and never used. This helps Debian maintainers make decisions such as + which packages should be installed by default on new systems. + + The resulting statistic is available from the project home page + http://popcon.debian.org/. + + Normally, popularity-contest is run from a cron(8) job, + /etc/cron.daily/popularity-contest, which automatically submits the + results to Debian package maintainers (only once a week) according to + the settings in /etc/popularity-contest.conf and /usr/share/popularity- + contest/default.conf. + ' + + From 'http://popcon.ubuntu.com/README': + ' + The popularity-contest output looks like this: + + POPULARITY-CONTEST-0 TIME:914183330 ID:b92a5fc1809d8a95a12eb3a3c8445 + 914183333 909868335 grep /bin/fgrep + 914183333 909868280 findutils /usr/bin/find + 914183330 909885698 dpkg-awk /usr/bin/dpkg-awk + 914183330 909868577 gawk /usr/bin/gawk + [...more lines...] + END-POPULARITY-CONTEST-0 TIME:914183335 + + The first and last lines allow you to put more than one set of + popularity-contest results into a single file and then split them up + easily later. + + The rest of the lines are package entries, one line for each package + installed on your system. They have the format: + + + + is the name of the Debian package that contains + . is the most recently used program, + static library, or header (.h) file in the package. + + and are the access time and creation time of the + on your disk, respectively, represented as the number of + seconds since midnight GMT on January 1, 1970 (i.e. in Unix time_t format). + Linux updates whenever you open the file; was set when you + first installed the package. + + is determined by popularity-contest depending on , , and + the current date. can be RECENT-CTIME, OLD, or NOFILES. + + RECENT-CTIME means that atime is very close to ctime; it's impossible to + tell whether the package was used recently or not, since is also + updated when is set. Normally, this happens because you have + recently upgraded the package to a new version, resetting the . + + OLD means that the is more than a month ago; you haven't used the + package for more than a month. + + NOFILES means that no files in the package seemed to be programs, so + , , and are invalid.' + + REMARKS. The parser will generate events solely based on the field + and not using , to reduce the generation of (possibly many) useless + events all with the same . Indeed, that will be probably + get from file system and/or package management logs. The will be + reported in the log line. +""" + +import logging + +import pyparsing + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class PopularityContestSessionEvent(time_events.PosixTimeEvent): + """Convenience class for a Popularity Contest start/end event.""" + + DATA_TYPE = 'popularity_contest:session:event' + + def __init__(self, timestamp, session, status, hostid=None, details=None): + """Initializes the event object. + + Args: + timestamp: microseconds since epoch in UTC, it's the start/end time. + session: the session number. + status: start or end of the session. + hostid: the host uuid. + details: the popularity contest version and host architecture. + """ + super(PopularityContestSessionEvent, self).__init__( + timestamp, eventdata.EventTimestamp.ADDED_TIME) + self.session = session + self.status = status + self.hostid = hostid + self.details = details + + +class PopularityContestEvent(time_events.PosixTimeEvent): + """Convenience class for a Popularity Contest line event.""" + + DATA_TYPE = 'popularity_contest:log:event' + + def __init__(self, timestamp, ctime, package, mru, tag=None): + """Initializes the event object. + + Args: + timestamp: microseconds since epoch in UTC, it's the . + ctime: seconds since epoch in UTC, it's the . + package: the installed packaged name, whom mru belongs to. + mru: the recently used app/library from package. + tag: the popularity context tag. + """ + super(PopularityContestEvent, self).__init__( + timestamp, eventdata.EventTimestamp.ACCESS_TIME) + # TODO: adding ctime as is, reconsider a conversion to human readable form. + self.ctime = ctime + self.package = package + self.mru = mru + self.record_tag = tag + + +class PopularityContestParser(text_parser.PyparsingSingleLineTextParser): + """Parse popularity contest log files.""" + + NAME = 'popularity_contest' + DESCRIPTION = u'Parser for popularity contest log files.' + + EPOCH = text_parser.PyparsingConstants.INTEGER.setResultsName('epoch') + PACKAGE = pyparsing.Word(pyparsing.printables).setResultsName('package') + MRU = pyparsing.Word(pyparsing.printables).setResultsName('mru') + TAG = pyparsing.QuotedString('<', endQuoteChar='>').setResultsName('tag') + + HEADER = ( + pyparsing.Literal(u'POPULARITY-CONTEST-').suppress() + + text_parser.PyparsingConstants.INTEGER.setResultsName('session') + + pyparsing.Literal(u'TIME:').suppress() + EPOCH + + pyparsing.Literal('ID:').suppress() + + pyparsing.Word(pyparsing.alphanums, exact=32).setResultsName('id') + + pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('details')) + + FOOTER = ( + pyparsing.Literal(u'END-POPULARITY-CONTEST-').suppress() + + text_parser.PyparsingConstants.INTEGER.setResultsName('session') + + pyparsing.Literal(u'TIME:').suppress() + EPOCH) + + LOG_LINE = ( + EPOCH.setResultsName('atime') + EPOCH.setResultsName('ctime') + + (PACKAGE + TAG | PACKAGE + MRU + pyparsing.Optional(TAG))) + + LINE_STRUCTURES = [ + ('logline', LOG_LINE), + ('header', HEADER), + ('footer', FOOTER), + ] + + def VerifyStructure(self, parser_context, line): + """Verify that this file is a Popularity Contest log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + line: A single line from the text file. + + Returns: + True if this is the correct parser, False otherwise. + """ + try: + header_struct = self.HEADER.parseString(line) + except pyparsing.ParseException: + logging.debug(u'Not a Popularity Contest log file, invalid header') + return False + if not timelib.Timestamp.FromPosixTime(header_struct.epoch): + logging.debug(u'Invalid Popularity Contest log file header timestamp.') + return False + return True + + def ParseRecord(self, parser_context, key, structure): + """Parse each record structure and return an EventObject if applicable. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + # TODO: Add anomaly objects for abnormal timestamps, such as when the log + # timestamp is greater than the session start. + if key == 'logline': + return self._ParseLogLine(structure) + elif key == 'header': + if not structure.epoch: + logging.debug(u'PopularityContestParser, header with invalid epoch.') + return + return PopularityContestSessionEvent( + structure.epoch, unicode(structure.session), u'start', structure.id, + structure.details) + elif key == 'footer': + if not structure.epoch: + logging.debug(u'PopularityContestParser, footer with invalid epoch.') + return + return PopularityContestSessionEvent( + structure.epoch, unicode(structure.session), u'end') + else: + logging.warning( + u'PopularityContestParser, unknown structure: {}.'.format(key)) + + def _ParseLogLine(self, structure): + """Gets an event_object or None from the pyparsing ParseResults. + + Args: + structure: the pyparsing ParseResults object. + + Returns: + event_object: a plaso event or None. + """ + # Required fields are and and we are not interested in + # log lines without . + if not structure.mru: + return + # The field (as ) is always present but could be 0. + # In case of equal to 0, we are in case, safely return + # without logging. + if not structure.atime: + return + # TODO: not doing any check on fields, even if only informative + # probably it could be better to check for the expected values. + # TODO: ctime is a numeric string representing seconds since epoch UTC, + # reconsider a conversion to integer together with microseconds usage. + return PopularityContestEvent( + structure.atime, structure.ctime, structure.package, structure.mru, + structure.tag) + + +manager.ParsersManager.RegisterParser(PopularityContestParser) diff --git a/plaso/parsers/popcontest_test.py b/plaso/parsers/popcontest_test.py new file mode 100644 index 0000000..c979fdf --- /dev/null +++ b/plaso/parsers/popcontest_test.py @@ -0,0 +1,145 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Popularity Contest (popcontest) parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import popcontest as popcontest_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import popcontest +from plaso.parsers import test_lib + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class PopularityContestUnitTest(test_lib.ParserTestCase): + """Tests for the popcontest parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = popcontest.PopularityContestParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['popcontest1.log']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 13) + + self.assertEquals( + event_objects[0].timestamp, + timelib_test.CopyStringToTimestamp('2010-06-22 05:41:41.000')) + expected_string = ( + u'Session 0 start ' + u'ID 12345678901234567890123456789012 [ARCH:i386 POPCONVER:1.38]') + expected_short_string = u'Session 0 start' + self._TestGetMessageStrings( + event_objects[0], expected_string, expected_short_string) + + self.assertEquals( + event_objects[1].timestamp, + timelib_test.CopyStringToTimestamp('2010-06-22 07:34:42.000')) + expected_string = u'mru [/usr/sbin/atd] package [at]' + expected_short_string = u'/usr/sbin/atd' + self._TestGetMessageStrings( + event_objects[1], expected_string, expected_short_string) + + self.assertEquals( + event_objects[2].timestamp, + timelib_test.CopyStringToTimestamp('2010-06-22 07:34:43.000')) + expected_string = ( + u'mru [/usr/lib/python2.5/lib-dynload/_struct.so] ' + u'package [python2.5-minimal]') + expected_short_string = u'/usr/lib/python2.5/lib-dynload/_struct.so' + self._TestGetMessageStrings( + event_objects[2], expected_string, expected_short_string) + + self.assertEquals( + event_objects[3].timestamp, + timelib_test.CopyStringToTimestamp('2010-05-30 05:26:20.000')) + expected_string = ( + u'mru [/usr/bin/empathy] package [empathy] tag [RECENT-CTIME]') + expected_short_string = u'/usr/bin/empathy' + self._TestGetMessageStrings( + event_objects[3], expected_string, expected_short_string) + + self.assertEquals( + event_objects[6].timestamp, + timelib_test.CopyStringToTimestamp('2010-05-12 07:58:33.000')) + expected_string = u'mru [/usr/bin/orca] package [gnome-orca] tag [OLD]' + expected_short_string = u'/usr/bin/orca' + self._TestGetMessageStrings( + event_objects[6], expected_string, expected_short_string) + + self.assertEquals( + event_objects[7].timestamp, + timelib_test.CopyStringToTimestamp('2010-06-22 05:41:41.000')) + expected_string = u'Session 0 end' + expected_short_string = expected_string + self._TestGetMessageStrings( + event_objects[7], expected_string, expected_short_string) + + self.assertEquals( + event_objects[8].timestamp, + timelib_test.CopyStringToTimestamp('2010-06-22 05:41:41.000')) + expected_string = ( + u'Session 1 start ' + u'ID 12345678901234567890123456789012 [ARCH:i386 POPCONVER:1.38]') + expected_short_string = u'Session 1 start' + self._TestGetMessageStrings( + event_objects[8], expected_string, expected_short_string) + + self.assertEquals( + event_objects[9].timestamp, + timelib_test.CopyStringToTimestamp('2010-06-22 07:34:42.000')) + expected_string = u'mru [/super/cool/plasuz] package [plaso]' + expected_short_string = u'/super/cool/plasuz' + self._TestGetMessageStrings( + event_objects[9], expected_string, expected_short_string) + + self.assertEquals( + event_objects[10].timestamp, + timelib_test.CopyStringToTimestamp('2010-04-06 12:25:42.000')) + expected_string = u'mru [/super/cool/plasuz] package [miss_ctime]' + expected_short_string = u'/super/cool/plasuz' + self._TestGetMessageStrings( + event_objects[10], expected_string, expected_short_string) + + self.assertEquals( + event_objects[11].timestamp, + timelib_test.CopyStringToTimestamp('2010-05-12 07:58:33.000')) + expected_string = u'mru [/super/cool] package [plaso] tag [WRONG_TAG]' + expected_short_string = u'/super/cool' + self._TestGetMessageStrings( + event_objects[11], expected_string, expected_short_string) + + self.assertEquals( + event_objects[12].timestamp, + timelib_test.CopyStringToTimestamp('2010-06-22 05:41:41.000')) + expected_string = u'Session 1 end' + expected_short_string = expected_string + self._TestGetMessageStrings( + event_objects[12], expected_string, expected_short_string) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/recycler.py b/plaso/parsers/recycler.py new file mode 100644 index 0000000..a1f0976 --- /dev/null +++ b/plaso/parsers/recycler.py @@ -0,0 +1,222 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Windows Recycle files, INFO2 and $I/$R pairs.""" + +import logging + +import construct + +from plaso.events import time_events +from plaso.lib import binary +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.lib import utils +from plaso.parsers import interface +from plaso.parsers import manager + + +class WinRecycleEvent(time_events.FiletimeEvent): + """Convenience class for a Windows Recycle bin EventObject.""" + + DATA_TYPE = 'windows:metadata:deleted_item' + + def __init__( + self, filename_ascii, filename_utf, record_information, record_size): + """Initializes the event object.""" + timestamp = record_information.get('filetime', 0) + + super(WinRecycleEvent, self).__init__( + timestamp, eventdata.EventTimestamp.DELETED_TIME) + + if 'index' in record_information: + self.index = record_information.get('index', 0) + self.offset = record_size * self.index + else: + self.offset = 0 + + self.drive_number = record_information.get('drive', None) + self.file_size = record_information.get('filesize', 0) + + if filename_utf: + self.orig_filename = filename_utf + else: + self.orig_filename = filename_ascii + + # The unicode cast is done on the ASCII string to make + # comparison work better (sometimes a warning that a comparison + # could not be made due to the objects being of different type). + if filename_ascii and unicode(filename_ascii) != filename_utf: + self.orig_filename_legacy = filename_ascii + + +class WinRecycleBinParser(interface.BaseParser): + """Parses the Windows $Recycle.Bin $I files.""" + + NAME = 'recycle_bin' + DESCRIPTION = u'Parser for Windows $Recycle.Bin $I files.' + + # Define a list of all structs needed. + # Struct read from: + # https://code.google.com/p/rifiuti2/source/browse/trunk/src/rifiuti-vista.h + RECORD_STRUCT = construct.Struct( + 'record', + construct.ULInt64('filesize'), + construct.ULInt64('filetime')) + + MAGIC_STRUCT = construct.ULInt64('magic') + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract entries from a Windows RecycleBin $Ixx file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + try: + magic_header = self.MAGIC_STRUCT.parse_stream(file_object) + except (construct.FieldError, IOError) as exception: + raise errors.UnableToParseFile( + u'Unable to parse $Ixxx file with error: {0:s}'.format(exception)) + + if magic_header is not 1: + raise errors.UnableToParseFile( + u'Not an $Ixxx file, wrong magic header.') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + # We may have to rely on filenames since this header is very generic. + # TODO: Rethink this and potentially make a better test. + base_filename = utils.GetBaseName(file_entry.name) + if not base_filename.startswith('$I'): + raise errors.UnableToParseFile( + u'Not an $Ixxx file, filename doesn\'t start with $I.') + + record = self.RECORD_STRUCT.parse_stream(file_object) + filename_utf = binary.ReadUtf16Stream(file_object) + + file_object.close() + event_object = WinRecycleEvent(u'', filename_utf, record, 0) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +class WinRecycleInfo2Parser(interface.BaseParser): + """Parses the Windows Recycler INFO2 file.""" + + NAME = 'recycle_bin_info2' + DESCRIPTION = u'Parser for Windows Recycler INFO2 files.' + + # Define a list of all structs used. + INT32_LE = construct.ULInt32('my_int') + + FILE_HEADER_STRUCT = construct.Struct( + 'file_header', + construct.Padding(8), + construct.ULInt32('record_size')) + + # Struct based on (-both unicode and legacy string): + # https://code.google.com/p/rifiuti2/source/browse/trunk/src/rifiuti.h + RECORD_STRUCT = construct.Struct( + 'record', + construct.ULInt32('index'), + construct.ULInt32('drive'), + construct.ULInt64('filetime'), + construct.ULInt32('filesize')) + + STRING_STRUCT = construct.CString('legacy_filename') + + # Define a list of needed variables. + UNICODE_FILENAME_OFFSET = 0x11C + RECORD_INDEX_OFFSET = 0x108 + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract entries from Windows Recycler INFO2 file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + try: + magic_header = self.INT32_LE.parse_stream(file_object) + except (construct.FieldError, IOError) as exception: + raise errors.UnableToParseFile( + u'Unable to parse INFO2 file with error: {0:s}'.format(exception)) + + if magic_header is not 5: + raise errors.UnableToParseFile( + u'Not an INFO2 file, wrong magic header.') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + # Since this header value is really generic it is hard not to use filename + # as an indicator too. + # TODO: Rethink this and potentially make a better test. + base_filename = utils.GetBaseName(file_entry.name) + if not base_filename.startswith('INFO2'): + raise errors.UnableToParseFile( + u'Not an INFO2 file, filename isn\'t INFO2.') + + file_header = self.FILE_HEADER_STRUCT.parse_stream(file_object) + + # Limit recrodsize to 65536 to be on the safe side. + record_size = file_header['record_size'] + if record_size > 65536: + logging.error(( + u'Record size: {0:d} is too large for INFO2 reducing to: ' + u'65535').format(record_size)) + record_size = 65535 + + # If recordsize is 0x320 then we have UTF/unicode names as well. + read_unicode_names = False + if record_size is 0x320: + read_unicode_names = True + + data = file_object.read(record_size) + while data: + if len(data) != record_size: + break + filename_ascii = self.STRING_STRUCT.parse(data[4:]) + record_information = self.RECORD_STRUCT.parse( + data[self.RECORD_INDEX_OFFSET:]) + if read_unicode_names: + filename_utf = binary.ReadUtf16( + data[self.UNICODE_FILENAME_OFFSET:]) + else: + filename_utf = u'' + + event_object = WinRecycleEvent( + filename_ascii, filename_utf, record_information, record_size) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + data = file_object.read(record_size) + + file_object.close() + + +manager.ParsersManager.RegisterParser(WinRecycleBinParser) diff --git a/plaso/parsers/recycler_test.py b/plaso/parsers/recycler_test.py new file mode 100644 index 0000000..aba7358 --- /dev/null +++ b/plaso/parsers/recycler_test.py @@ -0,0 +1,111 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Windows recycler parsers.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import recycler as recycler_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import recycler +from plaso.parsers import test_lib + + +class WinRecycleBinParserTest(test_lib.ParserTestCase): + """Tests for the Windows Recycle Bin parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = recycler.WinRecycleBinParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['$II3DF3L.zip']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + self.assertEquals(event_object.orig_filename, ( + u'C:\\Users\\nfury\\Documents\\Alloy Research\\StarFury.zip')) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-03-12 20:49:58.633') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals(event_object.file_size, 724919) + + expected_msg = ( + u'C:\\Users\\nfury\\Documents\\Alloy Research\\StarFury.zip ' + u'(from drive C?)') + expected_msg_short = ( + u'Deleted file: C:\\Users\\nfury\\Documents\\Alloy Research\\' + u'StarFury.zip') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +class WinRecyclerInfo2ParserTest(test_lib.ParserTestCase): + """Tests for the Windows Recycler INFO2 parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = recycler.WinRecycleInfo2Parser() + + def testParse(self): + """Reads an INFO2 file and run a few tests.""" + test_file = self._GetTestFilePath(['INFO2']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 4) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2004-08-25 16:18:25.237') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals(event_object.timestamp_desc, + eventdata.EventTimestamp.DELETED_TIME) + + self.assertEquals(event_object.index, 1) + self.assertEquals(event_object.orig_filename, ( + u'C:\\Documents and Settings\\Mr. Evil\\Desktop\\lalsetup250.exe')) + + event_object = event_objects[1] + + expected_msg = ( + u'DC2 -> C:\\Documents and Settings\\Mr. Evil\\Desktop' + u'\\netstumblerinstaller_0_4_0.exe [C:\\Documents and ' + u'Settings\\Mr. Evil\\Desktop\\netstumblerinstaller_0_4_0.exe] ' + u'(from drive C)') + expected_msg_short = ( + u'Deleted file: C:\\Documents and Settings\\Mr. Evil\\Desktop' + u'\\netstumblerinstaller...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[2] + + self._TestGetSourceStrings(event_object, u'Recycle Bin', u'RECBIN') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/rubanetra.py b/plaso/parsers/rubanetra.py new file mode 100755 index 0000000..ac002a5 --- /dev/null +++ b/plaso/parsers/rubanetra.py @@ -0,0 +1,754 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import logging +from plaso.lib import errors, event, timelib +from plaso.events.time_events import TimestampEvent +from plaso.lib.eventdata import EventTimestamp +from plaso.parsers import interface +from plaso.parsers import manager + +try: + import xml.etree.cElementTree as ElementTree +except ImportError: + import xml.etree.ElementTree as ElementTree + +__author__ = 'Stefan Swerk (stefan_rubanetra@swerk.priv.at)' + + +class RubanetraXmlParser(interface.BaseParser): + """ This class represents the python parser-component of the Rubanetra + project. Currently, it is only capable of parsing files adhering to the + XML standard and depends on the 'xml.etree' library. + """ + NAME = 'rubanetra_xml_parser' + DESCRIPTION = u'Rubanetra XML file parser' + VERSION = u'0.0.6' + + RUBANETRA_METADATA_FIELDS = frozenset(['implementationVersion', 'implementationTitle', 'implementationVendor']) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """ Parses a XML file containing Rubanetra produced content. + + :param parser_context: A parser context object (instance of ParserContext). + :param file_entry: A file entry object (instance of dfvfs.FileEntry). + :param parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + rubanetra_metadata_fields = set(self.RUBANETRA_METADATA_FIELDS) + rubanetra_metadata_dict = dict() + + file_handle = None + try: + if file_entry is not None and file_entry.IsFile(): + # Open the file read-only. + file_handle = file_entry.GetFileObject() + else: + raise errors.UnableToParseFile(u'Not a valid Rubanetra file.') + + file_size = file_handle.get_size() + + if file_size <= 0: + raise errors.UnableToParseFile(u'File size: {0:d} bytes is less or equal than 0.'.format(file_size)) + + # read from the beginning and check whether this is a XML file + file_handle.seek(0, os.SEEK_SET) + # get an iterable xml parser context + xml_parser_context = ElementTree.iterparse(file_handle, events=('start', 'end')) + # turn it into an iterator + xml_parser_context = iter(xml_parser_context) + # check whether this is a valid XML file + try: + xml_parser_context.next() + except ElementTree.ParseError: + raise errors.UnableToParseFile(u'Not a valid Rubanetra file (not XML).') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + # read from the beginning to process metadata + file_handle.seek(0, os.SEEK_SET) + # get an iterable xml parser context + xml_parser_context = ElementTree.iterparse(file_handle, events=('start', 'end')) + # turn it into an iterator + xml_parser_context = iter(xml_parser_context) + # get the root element + xml_event, xml_root = xml_parser_context.next() + + for xml_event, xml_elem in xml_parser_context: + if xml_event == 'end': + # ... process metadata ... + if xml_elem.tag in rubanetra_metadata_fields: + rubanetra_metadata_dict[xml_elem.tag] = xml_elem.text + rubanetra_metadata_fields.discard(xml_elem.tag) + elif len(rubanetra_metadata_fields) == 0: + xml_elem.clear() + xml_root.clear() + break + + xml_elem.clear() + xml_root.clear() + + if len(rubanetra_metadata_fields) != 0: + raise errors.UnableToParseFile( + u'Unable to verify metadata, required fields {0:s} could not be parsed'.format(rubanetra_metadata_fields)) + + self.validate_metadata(rubanetra_metadata_dict) + + # reopen the file handle + file_handle.seek(0, os.SEEK_SET) + + # get an iterable xml parser context + xml_parser_context = ElementTree.iterparse(file_handle, events=('start', 'end')) + # turn it into an iterator + xml_parser_context = iter(xml_parser_context) + # get the root element + xml_event, xml_root = xml_parser_context.next() + + for xml_event, xml_elem in xml_parser_context: + if xml_event == 'end': + # ... process activities ... + if xml_elem.tag == 'activity': + activity_dict_list = self.element_to_dict(xml_elem).pop('activity') + event_objects = self.parse_activity(merge_list_of_dicts_to_dict(activity_dict_list)) + parser_context.ProduceEvents(event_objects, parser_chain=parser_chain, file_entry=file_entry) + xml_root.clear() + finally: + if file_handle is not None: + file_handle.close() + + + def parse_activity(self, activity_dict_merged): + """ Takes a dictionary resembling an arbitrary activity and parses all fields in a recursive manner in order + to process nested activities as well as ordinary leaf values. + The following cases are currently handled: + - Nested and split activities may occur as a list of dictionaries, as long as each activity dict contains + an 'activityType' + - Nested activities are allowed to occur under arbitrary key values + - EventObjects are constructed using reflective access to the event class, i.e. each occurring 'activityType' must + be mapped via 'activity_type_to_class_dict' and the mapped class must have a constructor that can handle the + parsing process of a single flat activity using a dict. + - If there is no mapping of an occurring 'activityType' within 'activity_type_to_class_dict', a BaseActivityEvent + will be constructed instead. + - Leaf values that do not represent an entire Activity will not be modified but instead passed as constructor + argument to the corresponding EventObject implementation. + + :param activity_dict_merged: an arbitrary activity as dictionary, conforming to a certain 'activityType'. + :return: a list of EventObjects that could be parsed from the given dictionary, corresponding to the + value of 'activityType', if possible. Otherwise, either an empty list, in case the given dict does not + contain a valid activity, or None, in case the given argument is not a dict, will be returned. + """ + if activity_dict_merged is None: + return None + + event_objects = list() + if isinstance(activity_dict_merged, list): + for v in activity_dict_merged: + if isinstance(v, dict) and 'activityType' in v: + if len(v) == 1: + # It is safe to assume that this is a nested and split activity. Therefore break the loop + # and parse it. This assumption does no longer hold true in case of a scenario where + # an activity_dict_list contains both, a split activity and another nested activity_dict. + # Currently the xml parser handles this case by wrapping the nested activity_dict in another + # list. + return self.parse_activity(merge_list_of_dicts_to_dict(activity_dict_merged)) + else: + # this is a nested activity, parse and save objects + child_evt_objs = self.parse_activity(v) + if child_evt_objs is not None: + for e in child_evt_objs: + if isinstance(e, event.EventObject): + event_objects += [e] + + else: + event_objects += self.parse_activity(v) + + return event_objects + + # is it a leaf value or an actual activity? + if not isinstance(activity_dict_merged, dict): + return None + + + # it is at least a dict, however, is it an activity or a value? + activity_type = activity_dict_merged.get('activityType', None) + if activity_type is not None: # it is an activity + event_object_class = activity_type_to_class_dict.get(activity_type, BaseActivityEvent) + event_objects = [event_object_class(activity_dict_merged)] # TODO check whether it is an actual class + + # everything that remains may be another activity or an unconsumed leaf value + for k, v in activity_dict_merged.items(): + child_evt_objs = self.parse_activity(v) + # currently, the key attribute is not used -> TODO: find a way to link the child-events to its parent? + if child_evt_objs is not None: + for e in child_evt_objs: + if isinstance(e, event.EventObject): + event_objects += [e] + + return event_objects + + def parse_timestamp_events(self, activity_dict): + """ Takes a dictionary of dictionaries that must contain at least two keys: + - 'startInstant', a dictionary that corresponds to a serialized Java Instant object + - 'endInstant', as above + + This method will produce a dictionary consisting of either one or two JavaInstantEvent objects. + + :param activity_dict: containing the 'startInstant' and 'endInstant' dictionaries + :return: either one, iff 'startInstant' == 'endInstant', or two JavaInstantEvent-objects inside a dictionary. + """ + start_instant_dict = merge_list_of_dicts_to_dict(activity_dict.get('startInstant', None)) + end_instant_dict = merge_list_of_dicts_to_dict(activity_dict.get('endInstant', None)) + + if start_instant_dict != end_instant_dict: + instant_dict = dict(startInstant=None, endInstant=None) + # interval + start_instant_evt = JavaInstantEvent.from_java_instant_dict(start_instant_dict, + EventTimestamp.FIRST_CONNECTED) + end_instant_evt = JavaInstantEvent.from_java_instant_dict(end_instant_dict, + EventTimestamp.LAST_CONNECTED) + instant_dict['startInstant'] = start_instant_evt + instant_dict['endInstant'] = end_instant_evt + return instant_dict + else: + return dict(startInstant=JavaInstantEvent.from_java_instant_dict(start_instant_dict, u'Pcap time stamp')) + + def element_to_dict(self, elem): + """ Internal method to transform a XML node to a dictionary. + + :param elem: the XML node + :return: a dictionary containing the values below 'elem', using 'elem.tag' as respective keys + """ + return {elem.tag: map(self.element_to_dict, list(elem)) or elem.text} + + def validate_metadata(self, rubanetra_metadata_dict): + """ Tries to verify that the parsed XML document corresponds to a known version to prevent potential + issues due to version incompatibility. An exception will be raised if such a case is encountered. + + :param rubanetra_metadata_dict: a dictionary containing the basic Rubanetra metadata values + """ + if rubanetra_metadata_dict.get('implementationTitle') != u'Rubanetra': + raise errors.UnableToParseFile(u'Unknown Rubanetra implementation title encountered.') + + version = rubanetra_metadata_dict.get('implementationVersion') + if version != self.VERSION: + logging.warning(u'Rubanetra version number mismatch, expected:{0:s}, actual:{1:s}'.format(self.VERSION, version)) + + + def link_activity(self, event_object_from, event_object_to): + """ This method is currently unused, however, in case it is necessary to group multiple events, + a link between those events must be established. Whether a backtracking chain or a forward-chain should be + established depends entirely on the caller. + Currently, the UUID of 'event_object_to' will be appended to the list 'related_activity_uuids' of + 'event_object_from'. + """ + if isinstance(event_object_to, BaseActivityEvent): + event_object_from.related_activity_uuids.append(event_object_to.uuid) + # TODO: else error + + +class BaseActivityEvent(event.EventObject): + def __init__(self, activity_dict, + data_type='java:rubanetra:base_activity'): + """Initializes the base event object. + + Args: + activity_dict: A dictionary containing all related BaseActivity key/value pairs. + """ + super(BaseActivityEvent, self).__init__() + + if activity_dict is None: + raise errors.UnableToParseFile + + self.data_type = data_type + self.activity_type = activity_dict.pop('activityType', 'BaseActivity') + self.description = activity_dict.pop('description', None) + compound_frame_number_dict_list = activity_dict.pop('compoundFrameNumbers', None) + self.compound_frame_number_list = list() + if compound_frame_number_dict_list is not None: + for d in compound_frame_number_dict_list: + for k, v in d.items(): + self.compound_frame_number_list.append(long(v)) # TODO checks + + self.optional_field_dict = merge_list_of_dicts_to_dict(activity_dict.pop('optionalFields', None)) + self.replaced = string_to_boolean(activity_dict.pop('replaced', None)) + self.source_address = activity_dict.pop('sourceAddressAsString', None) + self.destination_address = activity_dict.pop('destinationAddressAsString', None) + + start_instant_dict = merge_list_of_dicts_to_dict(activity_dict.get('startInstant', None)) + end_instant_dict = merge_list_of_dicts_to_dict(activity_dict.get('endInstant', None)) + + start_instant_evt = None + if start_instant_dict != end_instant_dict: + start_instant_evt = JavaInstantEvent.from_java_instant_dict(start_instant_dict, + EventTimestamp.FIRST_CONNECTED) + # interval + end_instant_evt = JavaInstantEvent.from_java_instant_dict(end_instant_dict, + EventTimestamp.LAST_CONNECTED) + self.last_timestamp = end_instant_evt.timestamp if end_instant_evt is not None else None + else: + start_instant_evt = JavaInstantEvent.from_java_instant_dict(start_instant_dict, u'Pcap time stamp') + self.timestamp = self.first_timestamp = start_instant_evt.timestamp if start_instant_evt is not None else None + self.timestamp_desc = start_instant_evt.timestamp_desc + self.related_activity_uuids = list() + + +class PcapActivityEvent(BaseActivityEvent): + def __init__(self, pcap_activity_dict): + super(PcapActivityEvent, self).__init__(pcap_activity_dict, + data_type='java:rubanetra:pcap_activity') + pcap_packet = merge_list_of_dicts_to_dict(pcap_activity_dict.pop('pcapPacket', None)) + if pcap_packet is not None: + self.pcap_total_size = pcap_packet.pop('totalSize', None) + self.pcap_frame_number = pcap_packet.pop('frameNumber', None) + self.pcap_packet_wirelen = pcap_packet.pop('packetWirelen', None) + self.pcap_header_count = pcap_packet.pop('headerCount', None) + + +class HttpRequestActivityEvent(BaseActivityEvent): + def __init__(self, http_request_activity_dict): + super(HttpRequestActivityEvent, self).__init__(http_request_activity_dict, + data_type='java:rubanetra:http_request_activity') + self.http_version = http_request_activity_dict.pop('httpVersion', None) + self.server_address = http_request_activity_dict.pop('serverAddress', None) + self.client_address = http_request_activity_dict.pop('clientAddress', None) + self.http_method = http_request_activity_dict.pop('httpMethod', None) + self.http_query_string = http_request_activity_dict.pop('httpQueryString', None) + self.http_query_parameters = http_request_activity_dict.pop('httpQueryParameters', None) + self.http_request_header_dict = http_request_activity_dict.pop('requestHeaderMap', None) + self.url = http_request_activity_dict.pop('url', None) + + http_request = merge_list_of_dicts_to_dict(http_request_activity_dict.pop('httpRequest', None)) + if http_request is not None: + self.orig_http_header = http_request.pop('header', None) + self.content_type = http_request.pop('contentType', None) + self.is_response = http_request.pop('response', None) + self.jnetpcap_http_string = http_request.pop('JNetPcap-HTTP-String', None) + + self.source_address = self.client_address + self.destination_address = self.server_address + + +class HttpResponseActivityEvent(BaseActivityEvent): + def __init__(self, http_response_activity_dict): + super(HttpResponseActivityEvent, self).__init__(http_response_activity_dict, + data_type='java:rubanetra:http_response_activity') + self.http_version = http_response_activity_dict.pop('httpVersion', None) + self.response_status_code = http_response_activity_dict.pop('responseStatusCode', None) + self.response_status_line = http_response_activity_dict.pop('responseStatusLine', None) + self.response_header_dict = http_response_activity_dict.pop('responseHeaderMap', None) + + http_response = merge_list_of_dicts_to_dict(http_response_activity_dict.pop('httpResponse', None)) + + if http_response is not None: + self.orig_http_header = http_response.pop('header', None) + self.content_type = http_response.pop('contentType', None) + self.is_response = http_response.pop('response', None) + self.jnetpcap_http_string = http_response.pop('JNetPcap-HTTP-String', None) + + +class HttpImageActivityEvent(BaseActivityEvent): + def __init__(self, http_image_activity_dict): + super(HttpImageActivityEvent, self).__init__(http_image_activity_dict, + data_type='java:rubanetra:http_image_activity') + + self.image_type = http_image_activity_dict.pop('imageType', None) + self.image_path = http_image_activity_dict.pop('imagePath', None) + + +class DnsActivityEvent(BaseActivityEvent): + def __init__(self, dns_activity_dict): + super(DnsActivityEvent, self).__init__(dns_activity_dict, + data_type='java:rubanetra:dns_activity') + self.question_record_list = dns_activity_dict.pop('questionRecords', None) + self.answer_record_list = dns_activity_dict.pop('answerRecords', None) + self.authority_record_list = dns_activity_dict.pop('authorityRecords', None) + self.additional_record_list = dns_activity_dict.pop('additionalRecords', None) + self.dns_message_header = merge_list_of_dicts_to_dict(dns_activity_dict.pop('dnsMessageHeader', None)) + self.is_response_bool = string_to_boolean(dns_activity_dict.pop('response', None)) + + +class ArpActivityEvent(BaseActivityEvent): + def __init__(self, arp_activity_dict): + super(ArpActivityEvent, self).__init__(arp_activity_dict, + data_type='java:rubanetra:arp_activity') + self.hardware_type = arp_activity_dict.pop('hardwareType', None) + self.protocol_type = arp_activity_dict.pop('protocolType', None) + self.hardware_address_length = arp_activity_dict.pop('hardwareAddressLength', None) + self.protocol_address_length = arp_activity_dict.pop('protocolAddressLength', None) + self.sender_mac_address = arp_activity_dict.pop('senderHardwareAddress', None) + self.target_mac_address = arp_activity_dict.pop('targetHardwareAddress', None) + self.sender_protocol_address = arp_activity_dict.pop('senderProtocolAddress', None) + self.target_protocol_address = arp_activity_dict.pop('targetProtocolAddress', None) + self.jnetpcap_arp = arp_activity_dict.pop('arp', None) + + +class DhcpActivityEvent(BaseActivityEvent): + def __init__(self, dhcp_activity_dict): + super(DhcpActivityEvent, self).__init__(dhcp_activity_dict, + data_type='java:rubanetra:dhcp_activity') + self.dhcp_message = dhcp_activity_dict.pop('dhcpMessage', None) + + +class EthernetActivityEvent(BaseActivityEvent): + def __init__(self, ethernet_activity_dict): + super(EthernetActivityEvent, self).__init__(ethernet_activity_dict, + data_type='java:rubanetra:ethernet_activity') + self.source_mac_address = ethernet_activity_dict.pop('sourceMacAddress', None) + self.destination_mac_address = ethernet_activity_dict.pop('destinationMacAddress', None) + self.ethernet_type = ethernet_activity_dict.pop('ethernetType', None) + self.ethernet_type_enum = ethernet_activity_dict.pop('ethernetTypeEnum', None) + self.jnetpcap_ethernet = ethernet_activity_dict.pop('ethernet', None) + + +class FtpActivityEvent(BaseActivityEvent): + def __init__(self, ftp_activity_dict): + super(FtpActivityEvent, self).__init__(ftp_activity_dict, + data_type='java:rubanetra:ftp_activity') + self.ftp_type = ftp_activity_dict.pop('ftpActivityType', None) + self.command = ftp_activity_dict.pop('command', None) + self.reply = ftp_activity_dict.pop('reply', None) + self.list = ftp_activity_dict.pop('list', None) + + +class Icmpv4ActivityEvent(BaseActivityEvent): + def __init__(self, icmpv4_activity_dict): + super(Icmpv4ActivityEvent, self).__init__(icmpv4_activity_dict, + data_type='java:rubanetra:icmpv4_activity') + self.icmp_subtype = icmpv4_activity_dict.pop('icmpSubType', None) + self.icmp_packet = icmpv4_activity_dict.pop('icmpPacket', None) + self.icmp_message = icmpv4_activity_dict.pop('icmpMessage', None) + self.icmp_type = icmpv4_activity_dict.pop('icmpType', None) + self.icmp_code = icmpv4_activity_dict.pop('icmpCode', None) + self.source_address = icmpv4_activity_dict.pop('sourceAddress', None) + self.destination_address = icmpv4_activity_dict.pop('destinationAddress', None) + self.identifier = icmpv4_activity_dict.pop('identifier', None) + self.sequence = icmpv4_activity_dict.pop('sequence', None) + self.jnetpcap_icmp = icmpv4_activity_dict.pop('icmp', None) + + +class Icmpv6ActivityEvent(BaseActivityEvent): + def __init__(self, icmpv6_activity_dict): + super(Icmpv6ActivityEvent, self).__init__(icmpv6_activity_dict, + data_type='java:rubanetra:icmpv6_activity') + self.icmp_subtype = icmpv6_activity_dict.pop('icmpSubType', None) + self.icmp_packet = icmpv6_activity_dict.pop('icmpPacket', None) + self.icmp_message = icmpv6_activity_dict.pop('icmpMessage', None) + self.icmp_type = icmpv6_activity_dict.pop('icmpType', None) + self.jnetpcap_icmp = icmpv6_activity_dict.pop('icmp', None) + + +class IpActivityEvent(BaseActivityEvent): + def __init__(self, ip_activity_dict): + super(IpActivityEvent, self).__init__(ip_activity_dict, + data_type='java:rubanetra:ip_activity') + self.version = ip_activity_dict.pop('version', None) + self.protocol = ip_activity_dict.pop('protocol', None) + self.source_address = ip_activity_dict.pop('sourceAddress', None) + self.destination_address = ip_activity_dict.pop('destinationAddress', None) + + +class Ipv4ActivityEvent(BaseActivityEvent): + def __init__(self, ip_activity_dict): + super(Ipv4ActivityEvent, self).__init__(ip_activity_dict, + data_type='java:rubanetra:ipv4_activity') + self.internet_header_length = ip_activity_dict.pop('internetHeaderLength', None) + self.differentiated_services_code_point = ip_activity_dict.pop('differentiatedServicesCodePoint', None) + self.total_length = ip_activity_dict.pop('totalLength', None) + self.identification = ip_activity_dict.pop('identification', None) + self.flags = ip_activity_dict.pop('flags', None) + self.fragment_offset = ip_activity_dict.pop('fragmentOffset', None) + self.time_to_live = ip_activity_dict.pop('timeToLive', None) + self.header_checksum = ip_activity_dict.pop('headerChecksum', None) + self.options = ip_activity_dict.pop('options', None) + self.jnetpcap_ip4 = ip_activity_dict.pop('ipv4', None) + + +class Ipv6ActivityEvent(BaseActivityEvent): + def __init__(self, ip_activity_dict): + super(Ipv6ActivityEvent, self).__init__(ip_activity_dict, + data_type='java:rubanetra:ipv6_activity') + self.traffic_class = ip_activity_dict.pop('trafficClass', None) + self.flow_label = ip_activity_dict.pop('flowLabel', None) + self.payload_length = ip_activity_dict.pop('payloadLength', None) + self.next_header = ip_activity_dict.pop('nextHeader', None) + self.hop_limit = ip_activity_dict.pop('hopLimit', None) + self.jnetpcap_ip6 = ip_activity_dict.pop('ipv6', None) + self.kraken_ip6 = ip_activity_dict.pop('ipv6Packet', None) + + +class MsnActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(MsnActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:msn_activity') + self.account = activity_dict.pop('account', None) + self.chat = activity_dict.pop('chat', None) + + +class NetbiosActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(NetbiosActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:netbios_activity') + self.datagram_packet = activity_dict.pop('datagramPacket', None) + self.name_packet = activity_dict.pop('namePacket', None) + + +class Pop3ActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(Pop3ActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:pop3_activity') + self.sub_type = activity_dict.pop('subType', None) + self.header = activity_dict.pop('header', None) + self.data = activity_dict.pop('data', None) + self.command = activity_dict.pop('command', None) + self.response = activity_dict.pop('response', None) + + +class SmtpCommandActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(SmtpCommandActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:smtp_command_activity') + self.command = activity_dict.pop('command', None) + self.parameter = activity_dict.pop('parameter', None) + + +class SmtpReplyActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(SmtpReplyActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:smtp_reply_activity') + self.code = activity_dict.pop('code', None) + self.message = activity_dict.pop('message', None) + + +class SmtpSendActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(SmtpSendActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:smtp_send_activity') + self.header = activity_dict.pop('header', None) + self.data = activity_dict.pop('data', None) + + +class Snmpv1ActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(Snmpv1ActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:snmpv1_activity') + self.pdu = activity_dict.pop('pdu', None) + self.source_socket_address = activity_dict.pop('sourceSocketAddress', None) + self.destination_socket_address = activity_dict.pop('destinationSocketAddress', None) + + +class Snmpv2ActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(Snmpv2ActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:snmpv2_activity') + self.pdu = activity_dict.pop('pdu', None) + self.source_socket_address = activity_dict.pop('sourceSocketAddress', None) + self.destination_socket_address = activity_dict.pop('destinationSocketAddress', None) + + +class TcpActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(TcpActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:tcp_activity') + self.source_port = activity_dict.pop('sourcePort', None) + self.destination_port = activity_dict.pop('destinationPort', None) + self.sequence_number = activity_dict.pop('sequenceNumber', None) + self.acknowledge_number = activity_dict.pop('acknowledgeNumber', None) + self.relative_sequence_number = activity_dict.pop('relativeSequenceNumber', None) + self.relative_acknowledge_number = activity_dict.pop('relativeAcknowledgeNumber', None) + self.data_offset = activity_dict.pop('dataOffset', None) + self.control_bits = activity_dict.pop('controlBits', None) + self.window_size = activity_dict.pop('windowSize', None) + self.checksum = activity_dict.pop('checksum', None) + self.urgent_pointer = activity_dict.pop('urgentPointer', None) + self.tcp_length = activity_dict.pop('tcpLength', None) + self.options = activity_dict.pop('options', None) + self.padding = activity_dict.pop('padding', None) + self.syn = activity_dict.pop('syn', None) + self.ack = activity_dict.pop('ack', None) + self.psh = activity_dict.pop('psh', None) + self.fin = activity_dict.pop('fin', None) + self.rst = activity_dict.pop('rst', None) + self.urg = activity_dict.pop('urg', None) + self.direction = activity_dict.pop('direction', None) + self.client_state = activity_dict.pop('clientState', None) + self.server_state = activity_dict.pop('serverState', None) + self.jnetpcap_tcp = activity_dict.pop('tcp', None) + self.source_address = activity_dict.pop('sourceAddress', None) + self.destination_address = activity_dict.pop('destinationAddress', None) + self.source_socket_address = activity_dict.pop('sourceSocketAddress', None) + self.destination_socket_address = activity_dict.pop('destinationSocketAddress', None) + + +class TelnetActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(TelnetActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:telnet_activity') + self.sub_type = activity_dict.pop('subType', None) + self.command = activity_dict.pop('command', None) + self.option = activity_dict.pop('option', None) + self.ansi_mode = activity_dict.pop('ansiMode', None) + self.arguments = activity_dict.pop('arguments', None) + self.text = activity_dict.pop('text', None) + self.title = activity_dict.pop('title', None) + + +class TlsActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(TlsActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:tls_activity') + self.client_to_server_traffic = activity_dict.pop('clientToServerTraffic', None) + self.server_to_client_traffic = activity_dict.pop('serverToClientTraffic', None) + + +class UdpActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(UdpActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:udp_activity') + self.source_port = activity_dict.pop('sourcePort', None) + self.destination_port = activity_dict.pop('destinationPort', None) + self.length = activity_dict.pop('length', None) + self.checksum = activity_dict.pop('checksum', None) + self.jnetpcap_udp = activity_dict.pop('udp', None) + self.source_socket_address = activity_dict.pop('sourceSocketAddress', None) + self.destination_socket_address = activity_dict.pop('destinationSocketAddress', None) + + +class OpenSSHActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(OpenSSHActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:open_ssh_activity') + self.client_to_server_traffic = activity_dict.pop('clientToServerTraffic', None) + self.server_to_client_traffic = activity_dict.pop('serverToClientTraffic', None) + + +class DropboxTlsActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(DropboxTlsActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:dropbox_tls_activity') + self.client_address = activity_dict.pop('clientAddress', None) + self.server_address = activity_dict.pop('serverAddress', None) + + +class SpiderOakActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(SpiderOakActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:spideroak_activity') + self.client_address = activity_dict.pop('clientAddress', None) + self.server_address = activity_dict.pop('serverAddress', None) + + +class SkypePayloadActivityEvent(BaseActivityEvent): + def __init__(self, activity_dict): + super(SkypePayloadActivityEvent, self).__init__(activity_dict, + data_type='java:rubanetra:skype_payload_activity') + self.source_object_id = activity_dict.pop('sourceObjectId', None) + self.destination_object_id = activity_dict.pop('destinationObjectId', None) + self.source_host = activity_dict.pop('sourceHost', None) + self.destination_host = activity_dict.pop('destinationHost', None) + + +class JavaInstantEvent(TimestampEvent): + """Convenience class for a Java Instant-based event.""" + + def __init__(self, instant_epoch_seconds, instant_nano, usage, data_type='java:time:Instant'): + """Initializes a Java instant-based event object. + + Args: + java_java_instant_epoch_seconds: The Java epoch seconds value (long). + java_java_instant_nano: The Java nano seconds value (long), will be reduced to microsecond precision. + usage: The description of the usage of the instant value. + data_type: The event data type. If not set data_type is derived + from DATA_TYPE. + """ + super(JavaInstantEvent, self).__init__( + timelib.Timestamp.FromPosixTimeWithMicrosecond(instant_epoch_seconds, instant_nano / 1000), + usage, data_type) + self.instant_epoch_seconds = instant_epoch_seconds + self.instant_nano = instant_nano + self.related_activity_uuids = list() + + @classmethod + def from_java_instant_dict(cls, java_instant_as_dict, usage, data_type='java:time:Instant'): + # TODO: validate fields + instant_epoch_seconds = long(java_instant_as_dict.pop('epochSecond', -1)) + instant_nano = long(java_instant_as_dict.pop('nano', -1)) + + return cls(instant_epoch_seconds, instant_nano, usage, data_type) + +""" FIXME: This method is ineffective for now, because + it is apparently not possible to specify a filter expression that + is based on a boolean value. +""" +def string_to_boolean(s): + """ Returns true, iff s.lower() in ('true', '1') + + :param s: a String representation of a boolean value + :return:true, iff s.lower() in ('true', '1'), false otherwise + """ + #return s.lower() in ('true', '1') + return s + + +def merge_list_of_dicts_to_dict(list_of_dicts): + """ Takes a list of dictionaries and transforms it to a flat dictionary, overwriting duplicate keys in the process. + + :param list_of_dicts: a list of dictionaries + :return: a flat dictionary containing the keys and values of all dictionaries that were previously located inside the + list. If two dictionaries contained the same key, the mapping of the last dictionary that contained that key + will be included, while the older value is discarded. + """ + if list_of_dicts is None or not isinstance(list_of_dicts, list) or isinstance(list_of_dicts, dict): + return list_of_dicts + + return {k: v for d in list_of_dicts for k, v in d.items()} + +# A dictionary of 'activityType' to class mappings. +activity_type_to_class_dict = { + 'ArpActivity': ArpActivityEvent, + 'DhcpActivity': DhcpActivityEvent, + 'DnsActivity': DnsActivityEvent, + 'EthernetActivity': EthernetActivityEvent, + 'FtpActivity': FtpActivityEvent, + 'HttpImageActivity': HttpImageActivityEvent, + 'HttpRequestActivity': HttpRequestActivityEvent, + 'HttpResponseActivity': HttpResponseActivityEvent, + 'Icmpv4Activity': Icmpv4ActivityEvent, + 'Icmpv6Activity': Icmpv6ActivityEvent, + 'IpActivity': IpActivityEvent, + 'Ipv4Activity': Ipv4ActivityEvent, + 'Ipv6Activity': Ipv6ActivityEvent, + 'MsnActivity': MsnActivityEvent, + 'NetbiosActivity': NetbiosActivityEvent, + 'PcapActivity': PcapActivityEvent, + 'Pop3Activity': Pop3ActivityEvent, + 'SmtpCommandActivity': SmtpCommandActivityEvent, + 'SmtpReplyActivity': SmtpReplyActivityEvent, + 'SmtpSendActivity': SmtpSendActivityEvent, + 'TcpActivity': TcpActivityEvent, + 'TelnetActivity': TelnetActivityEvent, + 'TlsActivity': TlsActivityEvent, + 'UdpActivity': UdpActivityEvent, + 'OpenSSHActivity': OpenSSHActivityEvent, + 'DropboxTlsActivity': DropboxTlsActivityEvent, + 'SpiderOakActivity': SpiderOakActivityEvent, + 'SkypePayloadActivity': SkypePayloadActivityEvent} + +manager.ParsersManager.RegisterParser(RubanetraXmlParser) diff --git a/plaso/parsers/selinux.py b/plaso/parsers/selinux.py new file mode 100644 index 0000000..c963338 --- /dev/null +++ b/plaso/parsers/selinux.py @@ -0,0 +1,175 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains SELinux log file parser in plaso. + + Information updated 16 january 2013. + + The parser applies to SELinux 'audit.log' file. + An entry log file example is the following: + + type=AVC msg=audit(1105758604.519:420): avc: denied { getattr } for pid=5962 + comm="httpd" path="/home/auser/public_html" dev=sdb2 ino=921135 + + The Parser will extract the 'type' value, the timestamp abd the 'pid'. + In the previous example, the timestamp is '1105758604.519', and it + represents the EPOCH time (seconds since Jan 1, 1970) plus the + milliseconds past current time (epoch: 1105758604, milliseconds: 519). + + The number after the timestamp (420 in the example) is a 'serial number' + that can be used to correlate multiple logs generated from the same event. + + References + http://selinuxproject.org/page/NB_AL + http://blog.commandlinekungfu.com/2010/08/episode-106-epoch-fail.html + http://www.redhat.com/promo/summit/2010/presentations/ + taste_of_training/Summit_2010_SELinux.pdf +""" + +import logging +import re + +from plaso.events import text_events +from plaso.lib import errors +from plaso.lib import lexer +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class SELinuxLineEvent(text_events.TextEvent): + """Convenience class for a SELinux log line event.""" + DATA_TYPE = 'selinux:line' + + +class SELinuxParser(text_parser.SlowLexicalTextParser): + """Parse SELinux audit log files.""" + + NAME = 'selinux' + DESCRIPTION = u'Parser for SELinux audit log files.' + + PID_RE = re.compile(r'pid=([0-9]+)[\s]+', re.DOTALL) + + tokens = [ + # Skipping empty lines, both EOLs are considered here and in other states. + lexer.Token('INITIAL', r'^\r?\n', '', ''), + # FSM entry point ('type=anything msg=audit'), critical to recognize a + # SELinux audit file and used to retrieve the audit type. From there two + # next states are possible: TIME or failure, since TIME state is required. + # An empty type is not accepted and it will cause a failure. + # Examples: + # type=SYSCALL msg=audit(...): ... + # type=UNKNOWN[1323] msg=audit(...): ... + lexer.Token( + 'INITIAL', r'^type=([\w]+(\[[0-9]+\])?)[ \t]+msg=audit', 'ParseType', + 'TIMESTAMP'), + lexer.Token( + 'TIMESTAMP', r'\(([0-9]+)\.([0-9]+):([0-9]*)\):', 'ParseTime', + 'STRING'), + # Get the log entry description and stay in the same state. + lexer.Token('STRING', r'[ \t]*([^\r\n]+)', 'ParseString', ''), + # Entry parsed. Note that an empty description is managed and it will not + # raise a parsing failure. + lexer.Token('STRING', r'[ \t]*\r?\n', 'ParseMessage', 'INITIAL'), + # The entry is not formatted as expected, so the parsing failed. + lexer.Token('.', '([^\r\n]+)\r?\n', 'ParseFailed', 'INITIAL') + ] + + def __init__(self): + """Initializes a parser object.""" + # Set local_zone to false, since timestamps are UTC. + super(SELinuxParser, self).__init__(local_zone=False) + self.attributes = {'audit_type': '', 'pid': '', 'body': ''} + self.timestamp = 0 + + def ParseType(self, match=None, **unused_kwargs): + """Parse the audit event type. + + Args: + match: The regular expression match object. + """ + self.attributes['audit_type'] = match.group(1) + + def ParseTime(self, match=None, **unused_kwargs): + """Parse the log timestamp. + + Args: + match: The regular expression match object. + """ + # TODO: do something with match.group(3) ? + try: + number_of_seconds = int(match.group(1), 10) + timestamp = timelib.Timestamp.FromPosixTime(number_of_seconds) + timestamp += int(match.group(2), 10) * 1000 + self.timestamp = timestamp + except ValueError as exception: + logging.error( + u'Unable to retrieve timestamp with error: {0:s}'.format(exception)) + self.timestamp = 0 + raise lexer.ParseError(u'Not a valid timestamp.') + + def ParseString(self, match=None, **unused_kwargs): + """Add a string to the body attribute. + + This method extends the one from TextParser slightly, + searching for the 'pid=[0-9]+' value inside the message body. + + Args: + match: The regular expression match object. + """ + try: + self.attributes['body'] += match.group(1) + # TODO: fix it using lexer or remove pid parsing. + # Indeed this is something that lexer is able to manage, but 'pid' field + # is non positional: so, by doing the following step, the FSM is kept + # simpler. Left the 'to do' as a reminder of possible refactoring. + pid_search = self.PID_RE.search(self.attributes['body']) + if pid_search: + self.attributes['pid'] = pid_search.group(1) + except IndexError: + self.attributes['body'] += match.group(0).strip('\n') + + def ParseFailed(self, **unused_kwargs): + """Entry parsing failed callback.""" + raise lexer.ParseError(u'Unable to parse SELinux log line.') + + def ParseLine(self, parser_context): + """Parse a single line from the SELinux audit file. + + This method extends the one from TextParser slightly, creating a + SELinux event with the timestamp (UTC) taken from log entries. + + Args: + parser_context: A parser context object (instance of ParserContext). + + Returns: + An event object (instance of EventObject) that is constructed + from the selinux entry. + """ + if not self.timestamp: + raise errors.TimestampNotCorrectlyFormed( + u'Unable to parse entry, timestamp not defined.') + offset = getattr(self, 'entry_offset', 0) + event_object = SELinuxLineEvent(self.timestamp, offset, self.attributes) + self.timestamp = 0 + return event_object + + +manager.ParsersManager.RegisterParser(SELinuxParser) diff --git a/plaso/parsers/selinux_test.py b/plaso/parsers/selinux_test.py new file mode 100644 index 0000000..52cff16 --- /dev/null +++ b/plaso/parsers/selinux_test.py @@ -0,0 +1,96 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the selinux log file parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import selinux as selinux_formatter +from plaso.parsers import selinux +from plaso.parsers import test_lib + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class SELinuxUnitTest(test_lib.ParserTestCase): + """Tests for the selinux log file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = selinux.SELinuxParser() + + def testParse(self): + """Tests the Parse function.""" + knowledge_base_values = {'year': 2013} + test_file = self._GetTestFilePath(['selinux.log']) + event_queue_consumer = self._ParseFile( + self._parser, test_file, knowledge_base_values=knowledge_base_values) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 5) + + # Test case: normal entry. + event_object = event_objects[0] + + self.assertEquals(event_object.timestamp, 1337845201174000) + + expected_msg = ( + u'[audit_type: LOGIN, pid: 25443] pid=25443 uid=0 old ' + u'auid=4294967295 new auid=0 old ses=4294967295 new ses=1165') + expected_msg_short = ( + u'[audit_type: LOGIN, pid: 25443] pid=25443 uid=0 old ' + u'auid=4294967295 new auid=...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + # Test case: short date. + event_object = event_objects[1] + + self.assertEquals(event_object.timestamp, 1337845201000000) + + expected_string = u'[audit_type: SHORTDATE] check rounding' + + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + # Test case: no msg. + event_object = event_objects[2] + + self.assertEquals(event_object.timestamp, 1337845222174000) + + expected_string = u'[audit_type: NOMSG]' + + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + # Test case: under score. + event_object = event_objects[3] + + self.assertEquals(event_object.timestamp, 1337845666174000) + + expected_msg = ( + u'[audit_type: UNDER_SCORE, pid: 25444] pid=25444 uid=0 old ' + u'auid=4294967295 new auid=54321 old ses=4294967295 new ses=1166') + expected_msg_short = ( + u'[audit_type: UNDER_SCORE, pid: 25444] pid=25444 uid=0 old ' + u'auid=4294967295 new...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/shared/__init__.py b/plaso/parsers/shared/__init__.py new file mode 100644 index 0000000..f462564 --- /dev/null +++ b/plaso/parsers/shared/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/parsers/shared/shell_items.py b/plaso/parsers/shared/shell_items.py new file mode 100644 index 0000000..f3166cb --- /dev/null +++ b/plaso/parsers/shared/shell_items.py @@ -0,0 +1,188 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Windows NT shell items.""" + +import pyfwsi + +from plaso.events import shell_item_events +from plaso.lib import eventdata +from plaso.winnt import shell_folder_ids + + +if pyfwsi.get_version() < '20140714': + raise ImportWarning( + u'Shell item support functions require at least pyfwsi 20140714.') + + +class ShellItemsParser(object): + """Parses for Windows NT shell items.""" + + NAME = 'shell_items' + + def __init__(self, origin): + """Initializes the parser. + + Args: + origin: A string containing the origin of the event (event source). + """ + super(ShellItemsParser, self).__init__() + self._origin = origin + self._path_segments = [] + + def _BuildParserChain(self, parser_chain=None): + """Return the parser chain with the addition of the current parser. + + Args: + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Returns: + The parser chain, with the addition of the current parser. + """ + if not parser_chain: + return self.NAME + + return u'/'.join([parser_chain, self.NAME]) + + + def _ParseShellItem( + self, parser_context, shell_item, file_entry=None, parser_chain=None): + """Parses a shell item. + + Args: + parser_context: A parser context object (instance of ParserContext). + shell_item: the shell item (instance of pyfwsi.item). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + path_segment = None + + if isinstance(shell_item, pyfwsi.root_folder): + description = shell_folder_ids.DESCRIPTIONS.get( + shell_item.shell_folder_identifier, None) + + if description: + path_segment = description + else: + path_segment = u'{{{0:s}}}'.format(shell_item.shell_folder_identifier) + + elif isinstance(shell_item, pyfwsi.volume): + if shell_item.name: + path_segment = shell_item.name + elif shell_item.identifier: + path_segment = u'{{{0:s}}}'.format(shell_item.identifier) + + elif isinstance(shell_item, pyfwsi.file_entry): + long_name = u'' + localized_name = u'' + file_reference = u'' + for extension_block in shell_item.extension_blocks: + if isinstance(extension_block, pyfwsi.file_entry_extension): + long_name = extension_block.long_name + localized_name = extension_block.localized_name + file_reference = extension_block.file_reference + if file_reference: + file_reference = u'{0:d}-{1:d}'.format( + file_reference & 0xffffffffffff, file_reference >> 48) + + fat_date_time = extension_block.get_creation_time_as_integer() + if fat_date_time: + event_object = shell_item_events.ShellItemFileEntryEvent( + fat_date_time, eventdata.EventTimestamp.CREATION_TIME, + shell_item.name, long_name, localized_name, file_reference, + self._origin) + parser_context.ProduceEvent( + event_object, file_entry=file_entry, + parser_chain=parser_chain) + + fat_date_time = extension_block.get_access_time_as_integer() + if fat_date_time: + event_object = shell_item_events.ShellItemFileEntryEvent( + fat_date_time, eventdata.EventTimestamp.ACCESS_TIME, + shell_item.name, long_name, localized_name, file_reference, + self._origin) + parser_context.ProduceEvent( + event_object, file_entry=file_entry, + parser_chain=parser_chain) + + fat_date_time = shell_item.get_modification_time_as_integer() + if fat_date_time: + event_object = shell_item_events.ShellItemFileEntryEvent( + fat_date_time, eventdata.EventTimestamp.MODIFICATION_TIME, + shell_item.name, long_name, localized_name, file_reference, + self._origin) + parser_context.ProduceEvent( + event_object, file_entry=file_entry, + parser_chain=parser_chain) + + if long_name: + path_segment = long_name + elif shell_item.name: + path_segment = shell_item.name + + elif isinstance(shell_item, pyfwsi.network_location): + if shell_item.location: + path_segment = shell_item.location + + if path_segment is None and shell_item.class_type == 0x00: + # TODO: check for signature 0x23febbee + pass + + if path_segment is None: + path_segment = u'UNKNOWN: 0x{0:02x}'.format(shell_item.class_type) + + self._path_segments.append(path_segment) + + def CopyToPath(self): + """Copies the shell items to a path. + + Returns: + A Unicode string containing the converted shell item list path or None. + """ + if not self._path_segments: + return + + return u', '.join(self._path_segments) + + def Parse( + self, parser_context, byte_stream, codepage='cp1252', + file_entry=None, parser_chain=None): + """Parses the shell items from the byte stream. + + Args: + parser_context: A parser context object (instance of ParserContext). + byte_stream: a string holding the shell items data. + codepage: Optional byte stream codepage. The default is cp1252. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + self._path_segments = [] + shell_item_list = pyfwsi.item_list() + shell_item_list.copy_from_byte_stream(byte_stream, ascii_codepage=codepage) + # Add ourselves to the parser chain, so it is used for subsequent object + # creation. + parser_chain = self._BuildParserChain(parser_chain) + + for shell_item in shell_item_list.items: + self._ParseShellItem( + parser_context, shell_item, file_entry=file_entry, + parser_chain=parser_chain) diff --git a/plaso/parsers/skydrivelog.py b/plaso/parsers/skydrivelog.py new file mode 100644 index 0000000..f00cd57 --- /dev/null +++ b/plaso/parsers/skydrivelog.py @@ -0,0 +1,209 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains SkyDrive log file parser in plaso.""" + +import logging + +import pyparsing + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class SkyDriveLogEvent(time_events.TimestampEvent): + """Convenience class for a SkyDrive log line event.""" + DATA_TYPE = 'skydrive:log:line' + + def __init__(self, timestamp, offset, source_code, log_level, text): + """Initializes the event object. + + Args: + timestamp: The timestamp time value, epoch. + source_code: Details of the source code file generating the event. + log_level: The log level used for the event. + text: The log message. + """ + super(SkyDriveLogEvent, self).__init__( + timestamp, eventdata.EventTimestamp.ADDED_TIME) + self.offset = offset + self.source_code = source_code + self.log_level = log_level + self.text = text + + +class SkyDriveLogParser(text_parser.PyparsingSingleLineTextParser): + """Parse SkyDrive log files.""" + + NAME = 'skydrive_log' + DESCRIPTION = u'Parser for OneDrive (or SkyDrive) log files.' + + ENCODING = 'UTF-8-SIG' + + # Common SDL (SkyDriveLog) pyparsing objects. + SDL_COLON = pyparsing.Literal(u':') + SDL_EXCLAMATION = pyparsing.Literal(u'!') + + # Timestamp (08-01-2013 21:22:28.999). + SDL_TIMESTAMP = ( + text_parser.PyparsingConstants.DATE_REV + + text_parser.PyparsingConstants.TIME_MSEC).setResultsName('timestamp') + + # SkyDrive source code pyparsing structures. + SDL_SOURCE_CODE = pyparsing.Combine( + pyparsing.CharsNotIn(u':') + + SDL_COLON + + text_parser.PyparsingConstants.INTEGER + + SDL_EXCLAMATION + + pyparsing.Word(pyparsing.printables)).setResultsName('source_code') + + # SkyDriveLogLevel pyparsing structures. + SDL_LOG_LEVEL = ( + pyparsing.Literal(u'(').suppress() + + pyparsing.SkipTo(u')').setResultsName('log_level') + + pyparsing.Literal(u')').suppress()) + + # SkyDrive line pyparsing structure. + SDL_LINE = ( + SDL_TIMESTAMP + SDL_SOURCE_CODE + SDL_LOG_LEVEL + + SDL_COLON + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text')) + + # Sometimes the timestamped log line is followed by an empy line, + # then by a file name plus other data and finally by another empty + # line. It could happen that a logline is split in two parts. + # These lines will not be discarded and an event will be generated + # ad-hoc (see source), based on the last one if available. + SDL_NO_HEADER_SINGLE_LINE = ( + pyparsing.Optional(pyparsing.Literal(u'->').suppress()) + + pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text')) + + # Define the available log line structures. + LINE_STRUCTURES = [ + ('logline', SDL_LINE), + ('no_header_single_line', SDL_NO_HEADER_SINGLE_LINE), + ] + + def __init__(self): + """Initializes a parser object.""" + super(SkyDriveLogParser, self).__init__() + self.offset = 0 + self.last_event = None + + def VerifyStructure(self, parser_context, line): + """Verify that this file is a SkyDrive log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + line: A single line from the text file. + + Returns: + True if this is the correct parser, False otherwise. + """ + structure = self.SDL_LINE + parsed_structure = None + timestamp = None + try: + parsed_structure = structure.parseString(line) + except pyparsing.ParseException: + logging.debug(u'Not a SkyDrive log file') + return False + else: + timestamp = self._GetTimestamp(parsed_structure.timestamp) + if not timestamp: + logging.debug(u'Not a SkyDrive log file, invalid timestamp {0:s}'.format( + parsed_structure.timestamp)) + return False + return True + + def ParseRecord(self, parser_context, key, structure): + """Parse each record structure and return an EventObject if applicable. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + if key == 'logline': + return self._ParseLogline(structure) + elif key == 'no_header_single_line': + return self._ParseNoHeaderSingleLine(structure) + else: + logging.warning( + u'Unable to parse record, unknown structure: {0:s}'.format(key)) + + def _ParseLogline(self, structure): + """Parse a logline and store appropriate attributes.""" + timestamp = self._GetTimestamp(structure.timestamp) + if not timestamp: + logging.debug(u'Invalid timestamp {0:s}'.format(structure.timestamp)) + return + evt = SkyDriveLogEvent( + timestamp, self.offset, structure.source_code, structure.log_level, + structure.text) + self.last_event = evt + return evt + + def _ParseNoHeaderSingleLine(self, structure): + """Parse an isolated line and store appropriate attributes.""" + if not self.last_event: + logging.debug(u'SkyDrive, found isolated line with no previous events') + return + evt = SkyDriveLogEvent( + self.last_event.timestamp, self.last_event.offset, None, None, + structure.text) + # TODO think to a possible refactoring for the non-header lines. + self.last_event = None + return evt + + def _GetTimestamp(self, timestamp_pypr): + """Gets a timestamp from a pyparsing ParseResults timestamp. + + This is a timestamp_string as returned by using + text_parser.PyparsingConstants structures: + [[8, 1, 2013], [21, 22, 28], 999] + + Args: + timestamp_string: The pyparsing ParseResults object + + Returns: + timestamp: A plaso timelib timestamp event or 0. + """ + timestamp = 0 + try: + month, day, year = timestamp_pypr[0] + hour, minute, second = timestamp_pypr[1] + millisecond = timestamp_pypr[2] + timestamp = timelib.Timestamp.FromTimeParts( + year, month, day, hour, minute, second, + microseconds=(millisecond * 1000)) + except ValueError: + timestamp = 0 + return timestamp + + +manager.ParsersManager.RegisterParser(SkyDriveLogParser) diff --git a/plaso/parsers/skydrivelog_test.py b/plaso/parsers/skydrivelog_test.py new file mode 100644 index 0000000..2e7ef4b --- /dev/null +++ b/plaso/parsers/skydrivelog_test.py @@ -0,0 +1,85 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the skydrivelog parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import skydrivelog as skydrivelog_formatter +from plaso.lib import timelib_test +from plaso.parsers import skydrivelog +from plaso.parsers import test_lib + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class SkyDriveLogUnitTest(test_lib.ParserTestCase): + """Tests for the skydrivelog parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = skydrivelog.SkyDriveLogParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['skydrive.log']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 18) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-08-01 21:22:28.999') + self.assertEquals(event_objects[0].timestamp, expected_timestamp) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-08-01 21:22:29.702') + self.assertEquals(event_objects[1].timestamp, expected_timestamp) + self.assertEquals(event_objects[2].timestamp, expected_timestamp) + self.assertEquals(event_objects[3].timestamp, expected_timestamp) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-08-01 21:22:58.344') + self.assertEquals(event_objects[4].timestamp, expected_timestamp) + self.assertEquals(event_objects[5].timestamp, expected_timestamp) + + expected_msg = ( + u'[global.cpp:626!logVersionInfo] (DETAIL) 17.0.2011.0627 (Ship)') + expected_msg_short = ( + u'17.0.2011.0627 (Ship)') + self._TestGetMessageStrings( + event_objects[0], expected_msg, expected_msg_short) + + expected_msg = ( + u'SyncToken = LM%3d12345678905670%3bID%3d1234567890E059C0!' + u'103%3bLR%3d12345678905623%3aEP%3d2') + expected_msg_short = ( + u'SyncToken = LM%3d12345678905670%3bID%3d1234567890E059C0!' + u'103%3bLR%3d1234567890...') + self._TestGetMessageStrings( + event_objects[3], expected_msg, expected_msg_short) + + expected_string = ( + u'SyncToken = Not a sync token (\xe0\xe8\xec\xf2\xf9)!') + self._TestGetMessageStrings( + event_objects[17], expected_string, expected_string) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/skydrivelogerr.py b/plaso/parsers/skydrivelogerr.py new file mode 100644 index 0000000..3bde927 --- /dev/null +++ b/plaso/parsers/skydrivelogerr.py @@ -0,0 +1,257 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains SkyDrive error log file parser in plaso.""" + +import logging + +import pyparsing + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class SkyDriveLogErrorEvent(time_events.TimestampEvent): + """Convenience class for a SkyDrive error log line event.""" + DATA_TYPE = 'skydrive:error:line' + + def __init__(self, timestamp, module, source_code, text, detail): + """Initializes the event object. + + Args: + timestamp: Milliseconds since epoch in UTC. + module: The module name that generated the log line. + source_code: Logging source file and line number. + text: The error text message. + detail: The error details. + """ + super(SkyDriveLogErrorEvent, self).__init__( + timestamp, eventdata.EventTimestamp.ADDED_TIME) + self.module = module + self.source_code = source_code + self.text = text + self.detail = detail + + +class SkyDriveLogErrorParser(text_parser.PyparsingMultiLineTextParser): + """Parse SkyDrive error log files.""" + + NAME = 'skydrive_log_error' + DESCRIPTION = u'Parser for OneDrive (or SkyDrive) error log files.' + + ENCODING = 'utf-8' + + # Common SDE (SkyDriveError) structures. + INTEGER_CAST = text_parser.PyParseIntCast + HYPHEN = text_parser.PyparsingConstants.HYPHEN + TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS + TIME_MSEC = text_parser.PyparsingConstants.TIME_MSEC + MSEC = pyparsing.Word(pyparsing.nums, max=3).setParseAction(INTEGER_CAST) + COMMA = pyparsing.Literal(u',').suppress() + DOT = pyparsing.Literal(u'.').suppress() + IGNORE_FIELD = pyparsing.CharsNotIn(u',').suppress() + + # Header line timestamp (2013-07-25-160323.291). + SDE_HEADER_TIMESTAMP = pyparsing.Group( + text_parser.PyparsingConstants.DATE.setResultsName('date') + HYPHEN + + TWO_DIGITS.setResultsName('hh') + TWO_DIGITS.setResultsName('mm') + + TWO_DIGITS.setResultsName('ss') + DOT + + MSEC.setResultsName('ms')).setResultsName('hdr_timestamp') + + # Line timestamp (07-25-13,16:06:31.820). + SDE_TIMESTAMP = ( + TWO_DIGITS.setResultsName('month') + HYPHEN + + TWO_DIGITS.setResultsName('day') + HYPHEN + + TWO_DIGITS.setResultsName('year_short') + COMMA + + TIME_MSEC.setResultsName('time')).setResultsName('timestamp') + + # Header start. + SDE_HEADER_START = ( + pyparsing.Literal(u'######').suppress() + + pyparsing.Literal(u'Logging started.').setResultsName('log_start')) + + # Multiline entry end marker, matched from right to left. + SDE_ENTRY_END = pyparsing.StringEnd() | SDE_HEADER_START | SDE_TIMESTAMP + + # SkyDriveError line pyparsing structure. + SDE_LINE = ( + SDE_TIMESTAMP + COMMA + + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA + + pyparsing.CharsNotIn(u',').setResultsName('module') + COMMA + + pyparsing.CharsNotIn(u',').setResultsName('source_code') + COMMA + + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA + + pyparsing.Optional(pyparsing.CharsNotIn(u',').setResultsName('text')) + + COMMA + pyparsing.SkipTo(SDE_ENTRY_END).setResultsName('detail') + + pyparsing.lineEnd()) + + # SkyDriveError header pyparsing structure. + SDE_HEADER = ( + SDE_HEADER_START + + pyparsing.Literal(u'Version=').setResultsName('ver_str') + + pyparsing.Word(pyparsing.nums + u'.').setResultsName('ver_num') + + pyparsing.Literal(u'StartSystemTime:').suppress() + + SDE_HEADER_TIMESTAMP + + pyparsing.Literal(u'StartLocalTime:').setResultsName('lt_str') + + pyparsing.SkipTo(pyparsing.lineEnd()).setResultsName('details') + + pyparsing.lineEnd()) + + # Define the available log line structures. + LINE_STRUCTURES = [ + ('logline', SDE_LINE), + ('header', SDE_HEADER) + ] + + def __init__(self): + """Initializes a parser object.""" + super(SkyDriveLogErrorParser, self).__init__() + self.use_local_zone = False + + def VerifyStructure(self, parser_context, line): + """Verify that this file is a SkyDrive Error log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + line: A single line from the text file. + + Returns: + True if this is the correct parser, False otherwise. + """ + try: + parsed_structure = self.SDE_HEADER.parseString(line) + except pyparsing.ParseException: + logging.debug(u'Not a SkyDrive Error log file') + return False + timestamp = self._GetTimestampFromHeader(parsed_structure.hdr_timestamp) + if not timestamp: + logging.debug( + u'Not a SkyDrive Error log file, invalid timestamp {0:s}'.format( + parsed_structure.timestamp)) + return False + return True + + def ParseRecord(self, parser_context, key, structure): + """Parse each record structure and return an EventObject if applicable. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + if key == 'logline': + return self._ParseLine(structure) + elif key == 'header': + return self._ParseHeader(structure) + else: + logging.warning( + u'Unable to parse record, unknown structure: {0:s}'.format(key)) + + def _ParseLine(self, structure): + """Parse a logline and store appropriate attributes.""" + timestamp = self._GetTimestampFromLine(structure.timestamp) + if not timestamp: + logging.debug(u'SkyDriveLogError invalid timestamp {0:s}'.format( + structure.timestamp)) + return + # Replace newlines with spaces in structure.detail to preserve output. + return SkyDriveLogErrorEvent( + timestamp, structure.module, structure.source_code, + structure.text, structure.detail.replace(u'\n', u' ')) + + def _ParseHeader(self, structure): + """Parse header lines and store appropriate attributes. + + [u'Logging started.', u'Version=', u'17.0.2011.0627', + [2013, 7, 25], 16, 3, 23, 291, u'StartLocalTime', u'
'] + + Args: + structure: The parsed structure. + + Returns: + timestamp: The event or none. + """ + timestamp = self._GetTimestampFromHeader(structure.hdr_timestamp) + if not timestamp: + logging.debug( + u'SkyDriveLogError invalid timestamp {0:d}'.format( + structure.hdr_timestamp)) + return + text = u'{0:s} {1:s} {2:s}'.format( + structure.log_start, structure.ver_str, structure.ver_num) + detail = u'{0:s} {1:s}'.format(structure.lt_str, structure.details) + return SkyDriveLogErrorEvent( + timestamp, None, None, text, detail) + + def _GetTimestampFromHeader(self, structure): + """Gets a timestamp from the structure. + + The following is an example of the timestamp structure expected + [[2013, 7, 25], 16, 3, 23, 291] + + Args: + structure: The parsed structure, which should be a timestamp. + + Returns: + timestamp: A plaso timelib timestamp event or 0. + """ + year, month, day = structure.date + hour = structure.get('hh', 0) + minute = structure.get('mm', 0) + second = structure.get('ss', 0) + microsecond = structure.get('ms', 0) * 1000 + + return timelib.Timestamp.FromTimeParts( + year, month, day, hour, minute, second, microseconds=microsecond) + + def _GetTimestampFromLine(self, structure): + """Gets a timestamp from string from the structure + + The following is an example of the timestamp structure expected + [7, 25, 13, [16, 3, 24], 649] + + Args: + structure: The parsed structure. + + Returns: + timestamp: A plaso timelib timestamp event or 0. + """ + hour, minute, second = structure.time[0] + microsecond = structure.time[1] * 1000 + # TODO: Verify if timestamps are locale dependent. + year = structure.get('year_short', 0) + month = structure.get('month', 0) + day = structure.get('day', 0) + if year < 0 or not month or not day: + return 0 + + year += 2000 + + return timelib.Timestamp.FromTimeParts( + year, month, day, hour, minute, second, microseconds=microsecond) + + +manager.ParsersManager.RegisterParser(SkyDriveLogErrorParser) diff --git a/plaso/parsers/skydrivelogerr_test.py b/plaso/parsers/skydrivelogerr_test.py new file mode 100644 index 0000000..7cb14d6 --- /dev/null +++ b/plaso/parsers/skydrivelogerr_test.py @@ -0,0 +1,103 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the SkyDriveLogErr log parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import skydrivelogerr as skydrivelogerr_formatter +from plaso.lib import timelib_test +from plaso.parsers import skydrivelogerr as skydrivelogerr_parser +from plaso.parsers import test_lib + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class SkyDriveLogErrorUnitTest(test_lib.ParserTestCase): + """A unit test for the SkyDriveLogErr parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = skydrivelogerr_parser.SkyDriveLogErrorParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath([u'skydriveerr.log']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 19) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-25 16:03:23.291') + self.assertEquals(event_objects[0].timestamp, expected_timestamp) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-25 16:03:24.649') + self.assertEquals(event_objects[1].timestamp, expected_timestamp) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-08-01 21:27:44.124') + self.assertEquals(event_objects[18].timestamp, expected_timestamp) + + expected_detail = ( + u'StartLocalTime: 2013-07-25-180323.291 PID=0x8f4 TID=0x718 ' + u'ContinuedFrom=') + self.assertEquals(event_objects[0].detail, expected_detail) + + expected_string = ( + u'Logging started. Version= 17.0.2011.0627 ({0:s})').format( + expected_detail) + + expected_string_short = u'Logging started. Version= 17.0.2011.0627' + self._TestGetMessageStrings( + event_objects[0], expected_string, expected_string_short) + + expected_string = ( + u'[AUTH authapi.cpp(280)] Sign in failed : ' + 'DRX_E_AUTH_NO_VALID_CREDENTIALS') + expected_string_short = u'Sign in failed : DRX_E_AUTH_NO_VALID_CREDENTIALS' + self._TestGetMessageStrings( + event_objects[1], expected_string, expected_string_short) + + expected_string = ( + u'[WNS absconn.cpp(177)] Received data from server ' + '(dwID=0x0;dwSize=0x3e;pbData=PNG 9 CON 48 ' + '44)') + expected_string_short = u'Received data from server' + self._TestGetMessageStrings( + event_objects[18], expected_string, expected_string_short) + + def testParseUnicode(self): + """Tests the Parse function on Unicode data.""" + test_file = self._GetTestFilePath([u'skydriveerr-unicode.log']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 19) + + # TODO: check if this test passes because the encoding on my system + # is UTF-8. + expected_text = ( + u'No node found named Passport-Jméno-člena') + self.assertEquals(event_objects[3].text, expected_text) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite.py b/plaso/parsers/sqlite.py new file mode 100644 index 0000000..40d3e53 --- /dev/null +++ b/plaso/parsers/sqlite.py @@ -0,0 +1,279 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a SQLite parser.""" + +import logging +import os +import tempfile + +import sqlite3 + +from plaso.lib import errors +from plaso.parsers import interface +from plaso.parsers import manager +from plaso.parsers import plugins + + +class SQLiteCache(plugins.BasePluginCache): + """A cache storing query results for SQLite plugins.""" + + def CacheQueryResults( + self, sql_results, attribute_name, key_name, values): + """Build a dict object based on a SQL command. + + This function will take a SQL command, execute it and for + each resulting row it will store a key in a dictionary. + + An example: + sql_results = A SQL result object after executing the + SQL command: 'SELECT foo, bla, bar FROM my_table' + attribute_name = 'all_the_things' + key_name = 'foo' + values = ['bla', 'bar'] + + Results from running this against the database: + 'first', 'stuff', 'things' + 'second', 'another stuff', 'another thing' + + This will result in a dict object being created in the + cache, called 'all_the_things' and it will contain the following value: + + all_the_things = { + 'first': ['stuff', 'things'], + 'second': ['another_stuff', 'another_thing']} + + Args: + sql_results: The SQL result object (sqlite.Cursor) after executing + a SQL command on the database. + attribute_name: The attribute name in the cache to store + results to. This will be the name of the + dict attribute. + key_name: The name of the result field that should be used + as a key in the resulting dict that is created. + values: A list of result fields that are stored as values + to the dict. If this list has only one value in it + the value will be stored directly, otherwise the value + will be a list containing the extracted results based + on the names provided in this list. + """ + setattr(self, attribute_name, {}) + attribute = getattr(self, attribute_name) + + row = sql_results.fetchone() + while row: + if len(values) == 1: + attribute[row[key_name]] = row[values[0]] + else: + attribute[row[key_name]] = [] + for value in values: + attribute[row[key_name]].append(row[value]) + + row = sql_results.fetchone() + + +class SQLiteDatabase(object): + """A simple wrapper for opening up a SQLite database.""" + + # Magic value for a SQLite database. + MAGIC = 'SQLite format 3' + + _READ_BUFFER_SIZE = 65536 + + def __init__(self, file_entry): + """Initializes the database object. + + Args: + file_entry: the file entry object. + """ + self._cursor = None + self._database = None + self._file_entry = file_entry + self._open = False + self._tables = [] + self._temp_file_name = '' + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Make usable with "with" statement.""" + self.Close() + + def __enter__(self): + """Make usable with "with" statement.""" + return self + + @property + def cursor(self): + """Returns a cursor object from the database.""" + if not self._open: + self.Open() + + return self._database.cursor() + + @property + def tables(self): + """Returns a list of all the tables in the database.""" + if not self._open: + self.Open() + + return self._tables + + def Close(self): + """Close the database connection and clean up the temporary file.""" + if not self._open: + return + + self._database.close() + + try: + os.remove(self._temp_file_name) + except (OSError, IOError) as exception: + logging.warning(( + u'Unable to remove temporary copy: {0:s} of SQLite database: {1:s} ' + u'with error: {2:s}').format( + self._temp_file_name, self._file_entry.name, exception)) + + self._tables = [] + self._database = None + self._temp_file_name = '' + self._open = False + + def Open(self): + """Opens up a database connection and build a list of table names.""" + file_object = self._file_entry.GetFileObject() + + # TODO: Remove this when the classifier gets implemented + # and used. As of now, there is no check made against the file + # to verify it's signature, thus all files are sent here, meaning + # that this method assumes everything is a SQLite file and starts + # copying the content of the file into memory, which is not good + # for very large files. + file_object.seek(0, os.SEEK_SET) + + data = file_object.read(len(self.MAGIC)) + + if data != self.MAGIC: + file_object.close() + raise IOError( + u'File {0:s} not a SQLite database. (invalid signature)'.format( + self._file_entry.name)) + + # TODO: Current design copies the entire file into a buffer + # that is parsed by each SQLite parser. This is not very efficient, + # especially when many SQLite parsers are ran against a relatively + # large SQLite database. This temporary file that is created should + # be usable by all SQLite parsers so the file should only be read + # once in memory and then deleted when all SQLite parsers have completed. + + # TODO: Change this into a proper implementation using APSW + # and virtual filesystems when that will be available. + # Info: http://apidoc.apsw.googlecode.com/hg/vfs.html#vfs and + # http://apidoc.apsw.googlecode.com/hg/example.html#example-vfs + # Until then, just copy the file into a tempfile and parse it. + + # Note that data is filled here with the file header data and + # that with will explicitly close the temporary files and thus + # making sure it is available for sqlite3.connect(). + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + self._temp_file_name = temp_file.name + while data: + temp_file.write(data) + data = file_object.read(self._READ_BUFFER_SIZE) + + self._database = sqlite3.connect(self._temp_file_name) + try: + self._database.row_factory = sqlite3.Row + self._cursor = self._database.cursor() + except sqlite3.DatabaseError as exception: + logging.debug( + u'Unable to parse SQLite database: {0:s} with error: {1:s}'.format( + self._file_entry.name, exception)) + raise + + # Verify the table by reading in all table names and compare it to + # the list of required tables. + try: + sql_results = self._cursor.execute( + 'SELECT name FROM sqlite_master WHERE type="table"') + except sqlite3.DatabaseError as exception: + logging.debug( + u'Unable to parse SQLite database: {0:s} with error: {1:s}'.format( + self._file_entry.name, exception)) + raise + + self._tables = [] + for row in sql_results: + self._tables.append(row[0]) + + self._open = True + + +class SQLiteParser(interface.BasePluginsParser): + """A SQLite parser for Plaso.""" + + # Name of the parser, which enables all plugins by default. + NAME = 'sqlite' + DESCRIPTION = u'Parser for SQLite database files.' + + _plugin_classes = {} + + def __init__(self): + """Initializes a parser object.""" + super(SQLiteParser, self).__init__() + self._local_zone = False + self._plugins = SQLiteParser.GetPluginObjects() + self.db = None + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Parses an SQLite database. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Returns: + A event object generator (EventObjects) extracted from the database. + """ + with SQLiteDatabase(file_entry) as database: + try: + database.Open() + except IOError as exception: + raise errors.UnableToParseFile( + u'Unable to open database with error: {0:s}'.format( + repr(exception))) + except sqlite3.DatabaseError as exception: + raise errors.UnableToParseFile( + u'Unable to parse SQLite database with error: {0:s}.'.format( + repr(exception))) + + parser_chain = self._BuildParserChain(parser_chain) + # Create a cache in which the resulting tables are cached. + cache = SQLiteCache() + for plugin_object in self._plugins: + try: + plugin_object.Process( + parser_context, file_entry=file_entry, parser_chain=parser_chain, + cache=cache, database=database) + + except errors.WrongPlugin: + logging.debug( + u'Plugin: {0:s} cannot parse database: {1:s}'.format( + plugin_object.NAME, file_entry.name)) + + +manager.ParsersManager.RegisterParser(SQLiteParser) diff --git a/plaso/parsers/sqlite_plugins/__init__.py b/plaso/parsers/sqlite_plugins/__init__.py new file mode 100644 index 0000000..ad7d9ae --- /dev/null +++ b/plaso/parsers/sqlite_plugins/__init__.py @@ -0,0 +1,32 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an import statement for each SQLite plugin.""" + +from plaso.parsers.sqlite_plugins import android_calls +from plaso.parsers.sqlite_plugins import android_sms +from plaso.parsers.sqlite_plugins import appusage +from plaso.parsers.sqlite_plugins import chrome +from plaso.parsers.sqlite_plugins import chrome_cookies +from plaso.parsers.sqlite_plugins import chrome_extension_activity +from plaso.parsers.sqlite_plugins import firefox +from plaso.parsers.sqlite_plugins import firefox_cookies +from plaso.parsers.sqlite_plugins import gdrive +from plaso.parsers.sqlite_plugins import ls_quarantine +from plaso.parsers.sqlite_plugins import mac_document_versions +from plaso.parsers.sqlite_plugins import mackeeper_cache +from plaso.parsers.sqlite_plugins import skype +from plaso.parsers.sqlite_plugins import zeitgeist diff --git a/plaso/parsers/sqlite_plugins/android_calls.py b/plaso/parsers/sqlite_plugins/android_calls.py new file mode 100644 index 0000000..d8294b1 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/android_calls.py @@ -0,0 +1,111 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a parser for the Android contacts2 Call History. + +Android Call History is stored in SQLite database files named contacts2.db. +""" + +from plaso.events import time_events +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +class AndroidCallEvent(time_events.JavaTimeEvent): + """Convenience class for an Android Call History event.""" + + DATA_TYPE = 'android:event:call' + + def __init__( + self, java_time, usage, identifier, number, name, duration, call_type): + """Initializes the event object. + + Args: + java_time: The Java time value. + usage: The description of the usage of the time value. + identifier: The row identifier. + number: The phone number associated to the remote party. + duration: The number of seconds the call lasted. + call_type: Incoming, Outgoing, or Missed. + """ + super(AndroidCallEvent, self).__init__(java_time, usage) + self.offset = identifier + self.number = number + self.name = name + self.duration = duration + self.call_type = call_type + + +class AndroidCallPlugin(interface.SQLitePlugin): + """Parse Android contacts2 database.""" + + NAME = 'android_calls' + DESCRIPTION = u'Parser for Android calls SQLite database files.' + + # Define the needed queries. + QUERIES = [ + ('SELECT _id AS id, date, number, name, duration, type FROM calls', + 'ParseCallsRow')] + + CALL_TYPE = { + 1: u'INCOMING', + 2: u'OUTGOING', + 3: u'MISSED'} + + def ParseCallsRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a Call record row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + # Extract and lookup the call type. + call_type = self.CALL_TYPE.get(row['type'], u'UNKNOWN') + + event_object = AndroidCallEvent( + row['date'], u'Call Started', row['id'], row['number'], row['name'], + row['duration'], call_type) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry, + query=query) + + duration = row['duration'] + if isinstance(duration, basestring): + try: + duration = int(duration, 10) + except ValueError: + duration = 0 + + if duration: + # The duration is in seconds and the date value in milli seconds. + duration *= 1000 + event_object = AndroidCallEvent( + row['date'] + duration, u'Call Ended', row['id'], row['number'], + row['name'], row['duration'], call_type) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugin(AndroidCallPlugin) diff --git a/plaso/parsers/sqlite_plugins/android_calls_test.py b/plaso/parsers/sqlite_plugins/android_calls_test.py new file mode 100644 index 0000000..dac6baf --- /dev/null +++ b/plaso/parsers/sqlite_plugins/android_calls_test.py @@ -0,0 +1,91 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Android SMS call history plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import android_calls as android_calls_formatter +from plaso.lib import timelib_test +from plaso.parsers.sqlite_plugins import android_calls +from plaso.parsers.sqlite_plugins import test_lib + + +class AndroidCallSQLitePluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Android Call History database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = android_calls.AndroidCallPlugin() + + def testProcess(self): + """Test the Process function on an Android contacts2.db file.""" + test_file = self._GetTestFilePath(['contacts2.db']) + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The contacts2 database file contains 5 events (MISSED/OUTGOING/INCOMING). + self.assertEquals(len(event_objects), 5) + + # Check the first event. + event_object = event_objects[0] + + self.assertEquals(event_object.timestamp_desc, u'Call Started') + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-06 21:17:16.690') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_number = u'5404561685' + self.assertEquals(event_object.number, expected_number) + + expected_type = u'MISSED' + self.assertEquals(event_object.call_type, expected_type) + + expected_call = ( + u'MISSED ' + u'Number: 5404561685 ' + u'Name: Barney ' + u'Duration: 0 seconds') + expected_short = u'MISSED Call' + self._TestGetMessageStrings(event_object, expected_call, expected_short) + + # Run some tests on the last 2 events. + event_object_3 = event_objects[3] + event_object_4 = event_objects[4] + + # Check the timestamp_desc of the last event. + self.assertEquals(event_object_4.timestamp_desc, u'Call Ended') + + expected_timestamp3 = timelib_test.CopyStringToTimestamp( + '2013-11-07 00:03:36.690') + self.assertEquals(event_object_3.timestamp, expected_timestamp3) + + expected_timestamp4 = timelib_test.CopyStringToTimestamp( + '2013-11-07 00:14:15.690') + self.assertEquals(event_object_4.timestamp, expected_timestamp4) + + # Ensure the difference in btw. events 3 and 4 equals the duration. + expected_duration = ( + (expected_timestamp4 - expected_timestamp3) / 1000000) + self.assertEquals(event_object_4.duration, expected_duration) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/android_sms.py b/plaso/parsers/sqlite_plugins/android_sms.py new file mode 100644 index 0000000..b87824f --- /dev/null +++ b/plaso/parsers/sqlite_plugins/android_sms.py @@ -0,0 +1,100 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a parser for the Android SMS database. + +Android SMS messages are stored in SQLite database files named mmssms.dbs. +""" + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +class AndroidSmsEvent(time_events.JavaTimeEvent): + """Convenience class for an Android SMS event.""" + + DATA_TYPE = 'android:messaging:sms' + + def __init__(self, java_time, identifier, address, sms_read, sms_type, body): + """Initializes the event object. + + Args: + java_time: The Java time value. + identifier: The row identifier. + address: The phone number associated to the sender/receiver. + status: Read or Unread. + type: Sent or Received. + body: Content of the SMS text message. + """ + super(AndroidSmsEvent, self).__init__( + java_time, eventdata.EventTimestamp.CREATION_TIME) + self.offset = identifier + self.address = address + self.sms_read = sms_read + self.sms_type = sms_type + self.body = body + + +class AndroidSmsPlugin(interface.SQLitePlugin): + """Parse Android SMS database.""" + + NAME = 'android_sms' + DESCRIPTION = u'Parser for Android text messages SQLite database files.' + + # Define the needed queries. + QUERIES = [ + ('SELECT _id AS id, address, date, read, type, body FROM sms', + 'ParseSmsRow')] + + # The required tables. + REQUIRED_TABLES = frozenset(['sms']) + + SMS_TYPE = { + 1: u'RECEIVED', + 2: u'SENT'} + SMS_READ = { + 0: u'UNREAD', + 1: u'READ'} + + def ParseSmsRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses an SMS row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + # Extract and lookup the SMS type and read status. + sms_type = self.SMS_TYPE.get(row['type'], u'UNKNOWN') + sms_read = self.SMS_READ.get(row['read'], u'UNKNOWN') + + event_object = AndroidSmsEvent( + row['date'], row['id'], row['address'], sms_read, sms_type, row['body']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugin(AndroidSmsPlugin) diff --git a/plaso/parsers/sqlite_plugins/android_sms_test.py b/plaso/parsers/sqlite_plugins/android_sms_test.py new file mode 100644 index 0000000..44f727d --- /dev/null +++ b/plaso/parsers/sqlite_plugins/android_sms_test.py @@ -0,0 +1,73 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Android SMS plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import android_sms as android_sms_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers.sqlite_plugins import android_sms +from plaso.parsers.sqlite_plugins import test_lib + + +class AndroidSmsTest(test_lib.SQLitePluginTestCase): + """Tests for the Android SMS database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = android_sms.AndroidSmsPlugin() + + def testProcess(self): + """Test the Process function on an Android SMS mmssms.db file.""" + test_file = self._GetTestFilePath(['mmssms.db']) + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The SMS database file contains 9 events (5 SENT, 4 RECEIVED messages). + self.assertEquals(len(event_objects), 9) + + # Check the first SMS sent. + event_object = event_objects[0] + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-10-29 16:56:28.038') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_address = u'1 555-521-5554' + self.assertEquals(event_object.address, expected_address) + + expected_body = u'Yo Fred this is my new number.' + self.assertEquals(event_object.body, expected_body) + + expected_msg = ( + u'Type: SENT ' + u'Address: 1 555-521-5554 ' + u'Status: READ ' + u'Message: Yo Fred this is my new number.') + expected_short = u'Yo Fred this is my new number.' + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/appusage.py b/plaso/parsers/sqlite_plugins/appusage.py new file mode 100644 index 0000000..fa53c30 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/appusage.py @@ -0,0 +1,108 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a parser for the Mac OS X application usage. + + The application usage is stored in SQLite database files named + /var/db/application_usage.sqlite +""" + +from plaso.events import time_events +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +class MacOSXApplicationUsageEvent(time_events.PosixTimeEvent): + """Convenience class for a Mac OS X application usage event.""" + + DATA_TYPE = 'macosx:application_usage' + + def __init__( + self, posix_time, usage, application_name, application_version, + bundle_id, number_of_times): + """Initializes the event object. + + Args: + posix_time: The POSIX time value. + usage: The description of the usage of the time value. + application_name: The name of the application. + application_version: The version of the application. + bundle_id: The bundle identifier of the application. + number_of_times: TODO: number of times what? + """ + super(MacOSXApplicationUsageEvent, self).__init__(posix_time, usage) + + self.application = application_name + self.app_version = application_version + self.bundle_id = bundle_id + self.count = number_of_times + + +class ApplicationUsagePlugin(interface.SQLitePlugin): + """Parse Application Usage history files. + + Application usage is a SQLite database that logs down entries + triggered by NSWorkspaceWillLaunchApplicationNotification and + NSWorkspaceDidTerminateApplicationNotification NSWorkspace notifications by + crankd. + + See the code here: + http://code.google.com/p/google-macops/source/browse/trunk/crankd/\ + ApplicationUsage.py + + Default installation: /var/db/application_usage.sqlite + """ + + NAME = 'appusage' + DESCRIPTION = u'Parser for Mac OS X application usage SQLite database files.' + + # Define the needed queries. + QUERIES = [( + ('SELECT last_time, event, bundle_id, app_version, app_path, ' + 'number_times FROM application_usage ORDER BY last_time'), + 'ParseApplicationUsageRow')] + + # The required tables. + REQUIRED_TABLES = frozenset(['application_usage']) + + def ParseApplicationUsageRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses an application usage row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + # TODO: replace usage by definition(s) in eventdata. Not sure which values + # it will hold here. + usage = u'Application {0:s}'.format(row['event']) + + event_object = MacOSXApplicationUsageEvent( + row['last_time'], usage, row['app_path'], row['app_version'], + row['bundle_id'], row['number_times']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugin(ApplicationUsagePlugin) diff --git a/plaso/parsers/sqlite_plugins/appusage_test.py b/plaso/parsers/sqlite_plugins/appusage_test.py new file mode 100644 index 0000000..e714238 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/appusage_test.py @@ -0,0 +1,69 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Mac OS X application usage database plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import appusage as appusage_formatter +from plaso.lib import timelib_test +from plaso.parsers.sqlite_plugins import test_lib +from plaso.parsers.sqlite_plugins import appusage + + +class ApplicationUsagePluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Mac OS X application usage activity database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = appusage.ApplicationUsagePlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['application_usage.sqlite']) + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The sqlite database contains 5 events. + self.assertEquals(len(event_objects), 5) + + # Check the first event. + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-05-07 18:52:02') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(event_object.application, u'/Applications/Safari.app') + self.assertEquals(event_object.app_version, u'9537.75.14') + self.assertEquals(event_object.bundle_id, u'com.apple.Safari') + self.assertEquals(event_object.count, 1) + + expected_msg = ( + u'/Applications/Safari.app v.9537.75.14 ' + u'(bundle: com.apple.Safari). ' + u'Launched: 1 time(s)') + + expected_msg_short = u'/Applications/Safari.app (1 time(s))' + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/chrome.py b/plaso/parsers/sqlite_plugins/chrome.py new file mode 100644 index 0000000..04abc82 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/chrome.py @@ -0,0 +1,337 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for the Google Chrome History files. + + The Chrome History is stored in SQLite database files named History + and Archived History. Where the Archived History does not contain + the downloads table. +""" + +from plaso.events import time_events +from plaso.lib import timelib +from plaso.lib import eventdata +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +class ChromeHistoryFileDownloadedEvent(time_events.TimestampEvent): + """Convenience class for a Chrome History file downloaded event.""" + DATA_TYPE = 'chrome:history:file_downloaded' + + def __init__( + self, timestamp, row_id, url, full_path, received_bytes, total_bytes): + """Initializes the event object. + + Args: + timestamp: The timestamp value. + row_id: The identifier of the corresponding row. + url: The URL of the downloaded file. + full_path: The full path where the file was downloaded to. + received_bytes: The number of bytes received while downloading. + total_bytes: The total number of bytes to download. + """ + super(ChromeHistoryFileDownloadedEvent, self).__init__( + timestamp, eventdata.EventTimestamp.FILE_DOWNLOADED) + + self.offset = row_id + self.url = url + self.full_path = full_path + self.received_bytes = received_bytes + self.total_bytes = total_bytes + + +class ChromeHistoryPageVisitedEvent(time_events.WebKitTimeEvent): + """Convenience class for a Chrome History page visited event.""" + DATA_TYPE = 'chrome:history:page_visited' + + # TODO: refactor extra to be conditional arguments. + def __init__( + self, webkit_time, row_id, url, title, hostname, typed_count, from_visit, + extra, visit_source): + """Initializes the event object. + + Args: + webkit_time: The WebKit time value. + row_id: The identifier of the corresponding row. + url: The URL of the visited page. + title: The title of the visited page. + hostname: The visited hostname. + typed_count: The number of charcters of the URL that were typed. + from_visit: The URL where the visit originated from. + extra: String containing extra event data. + visit_source: The source of the page visit, if defined. + """ + super(ChromeHistoryPageVisitedEvent, self).__init__( + webkit_time, eventdata.EventTimestamp.PAGE_VISITED) + + self.offset = row_id + self.url = url + self.title = title + self.host = hostname + self.typed_count = typed_count + self.from_visit = from_visit + self.extra = extra + if visit_source is not None: + self.visit_source = visit_source + + +class ChromeHistoryPlugin(interface.SQLitePlugin): + """Parse Chrome Archived History and History files.""" + + NAME = 'chrome_history' + DESCRIPTION = u'Parser for Chrome history SQLite database files.' + + # Define the needed queries. + QUERIES = [ + (('SELECT urls.id, urls.url, urls.title, urls.visit_count, ' + 'urls.typed_count, urls.last_visit_time, urls.hidden, visits.' + 'visit_time, visits.from_visit, visits.transition, visits.id ' + 'AS visit_id FROM urls, visits WHERE urls.id = visits.url ORDER ' + 'BY visits.visit_time'), 'ParseLastVisitedRow'), + (('SELECT downloads.id AS id, downloads.start_time,' + 'downloads.target_path, downloads_url_chains.url, ' + 'downloads.received_bytes, downloads.total_bytes FROM downloads,' + ' downloads_url_chains WHERE downloads.id = ' + 'downloads_url_chains.id'), 'ParseNewFileDownloadedRow'), + (('SELECT id, full_path, url, start_time, received_bytes, ' + 'total_bytes,state FROM downloads'), 'ParseFileDownloadedRow')] + + # The required tables common to Archived History and History. + REQUIRED_TABLES = frozenset([ + 'keyword_search_terms', 'meta', 'urls', 'visits', 'visit_source']) + + # Queries for cache building. + URL_CACHE_QUERY = ( + 'SELECT visits.id AS id, urls.url, urls.title FROM ' + 'visits, urls WHERE urls.id = visits.url') + SYNC_CACHE_QUERY = 'SELECT id, source FROM visit_source' + + # The following definition for values can be found here: + # http://src.chromium.org/svn/trunk/src/content/public/common/ \ + # page_transition_types_list.h + PAGE_TRANSITION = { + 0: u'LINK', + 1: u'TYPED', + 2: u'AUTO_BOOKMARK', + 3: u'AUTO_SUBFRAME', + 4: u'MANUAL_SUBFRAME', + 5: u'GENERATED', + 6: u'START_PAGE', + 7: u'FORM_SUBMIT', + 8: u'RELOAD', + 9: u'KEYWORD', + 10: u'KEYWORD_GENERATED ' + } + + TRANSITION_LONGER = { + 0: u'User clicked a link', + 1: u'User typed the URL in the URL bar', + 2: u'Got through a suggestion in the UI', + 3: (u'Content automatically loaded in a non-toplevel frame - user may ' + u'not realize'), + 4: u'Subframe explicitly requested by the user', + 5: (u'User typed in the URL bar and selected an entry from the list - ' + u'such as a search bar'), + 6: u'The start page of the browser', + 7: u'A form the user has submitted values to', + 8: (u'The user reloaded the page, eg by hitting the reload button or ' + u'restored a session'), + 9: (u'URL what was generated from a replaceable keyword other than the ' + u'default search provider'), + 10: u'Corresponds to a visit generated from a KEYWORD' + } + + # The following is the values for the source enum found in the visit_source + # table and describes where a record originated from (if it originates from a + # different storage than locally generated). + # The source can be found here: + # http://src.chromium.org/svn/trunk/src/chrome/browser/history/\ + # history_types.h + VISIT_SOURCE = { + 0: u'SOURCE_SYNCED', + 1: u'SOURCE_BROWSED', + 2: u'SOURCE_EXTENSION', + 3: u'SOURCE_FIREFOX_IMPORTED', + 4: u'SOURCE_IE_IMPORTED', + 5: u'SOURCE_SAFARI_IMPORTED' + } + + CORE_MASK = 0xff + + def _GetHostname(self, hostname): + """Return a hostname from a full URL.""" + if hostname.startswith('http') or hostname.startswith('ftp'): + _, _, uri = hostname.partition('//') + hostname, _, _ = uri.partition('/') + + return hostname + + if hostname.startswith('about') or hostname.startswith('chrome'): + site, _, _ = hostname.partition('/') + return site + + return hostname + + def _GetUrl(self, url, cache, database): + """Return an URL from a reference to an entry in the from_visit table.""" + if not url: + return u'' + + url_cache_results = cache.GetResults('url') + if not url_cache_results: + cursor = database.cursor + result_set = cursor.execute(self.URL_CACHE_QUERY) + cache.CacheQueryResults( + result_set, 'url', 'id', ('url', 'title')) + url_cache_results = cache.GetResults('url') + + reference_url, reference_title = url_cache_results.get(url, [u'', u'']) + + if not reference_url: + return u'' + + return u'{0:s} ({1:s})'.format(reference_url, reference_title) + + def _GetVisitSource(self, visit_id, cache, database): + """Return a string denoting the visit source type if possible. + + Args: + visit_id: The ID from the visits table for the particular record. + cache: A cache object (instance of SQLiteCache). + database: A database object (instance of SQLiteDatabase). + + Returns: + A string with the visit source, None if not found. + """ + if not visit_id: + return + + sync_cache_results = cache.GetResults('sync') + if not sync_cache_results: + cursor = database.cursor + result_set = cursor.execute(self.SYNC_CACHE_QUERY) + cache.CacheQueryResults( + result_set, 'sync', 'id', ('source',)) + sync_cache_results = cache.GetResults('sync') + + results = sync_cache_results.get(visit_id, None) + if results is None: + return + + return self.VISIT_SOURCE.get(results, None) + + def ParseFileDownloadedRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a file downloaded row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + timestamp = timelib.Timestamp.FromPosixTime(row['start_time']) + event_object = ChromeHistoryFileDownloadedEvent( + timestamp, row['id'], row['url'], row['full_path'], + row['received_bytes'], row['total_bytes']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + def ParseNewFileDownloadedRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a file downloaded row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + timestamp = timelib.Timestamp.FromWebKitTime(row['start_time']) + event_object = ChromeHistoryFileDownloadedEvent( + timestamp, row['id'], row['url'], row['target_path'], + row['received_bytes'], row['total_bytes']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + def ParseLastVisitedRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + cache=None, database=None, + **unused_kwargs): + """Parses a last visited row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + cache: Optional cache object (instance of SQLiteCache). + The default is None. + database: Optional database object (instance of SQLiteDatabase). + The default is None. + """ + extras = [] + + transition_nr = row['transition'] & self.CORE_MASK + page_transition = self.PAGE_TRANSITION.get(transition_nr, '') + if page_transition: + extras.append(u'Type: [{0:s} - {1:s}]'.format( + page_transition, self.TRANSITION_LONGER.get(transition_nr, ''))) + + if row['hidden'] == '1': + extras.append(u'(url hidden)') + + # TODO: move to formatter. + count = row['typed_count'] + if count >= 1: + if count > 1: + multi = u's' + else: + multi = u'' + + extras.append(u'(type count {1:d} time{0:s})'.format(multi, count)) + else: + extras.append(u'(URL not typed directly - no typed count)') + + visit_source = self._GetVisitSource(row['visit_id'], cache, database) + + # TODO: replace extras by conditional formatting. + event_object = ChromeHistoryPageVisitedEvent( + row['visit_time'], row['id'], row['url'], row['title'], + self._GetHostname(row['url']), row['typed_count'], + self._GetUrl(row['from_visit'], cache, database), u' '.join(extras), + visit_source) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugin(ChromeHistoryPlugin) diff --git a/plaso/parsers/sqlite_plugins/chrome_cookies.py b/plaso/parsers/sqlite_plugins/chrome_cookies.py new file mode 100644 index 0000000..481d7a9 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/chrome_cookies.py @@ -0,0 +1,166 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for the Google Chrome Cookie database.""" + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +# Register the cookie plugins. +from plaso.parsers import cookie_plugins # pylint: disable=unused-import +from plaso.parsers import sqlite +from plaso.parsers.cookie_plugins import interface as cookie_interface +from plaso.parsers.sqlite_plugins import interface + + +class ChromeCookieEvent(time_events.WebKitTimeEvent): + """Convenience class for a Chrome Cookie event.""" + + DATA_TYPE = 'chrome:cookie:entry' + + def __init__( + self, timestamp, usage, hostname, cookie_name, value, path, secure, + httponly, persistent): + """Initializes the event. + + Args: + timestamp: The timestamp value in WebKit format.. + usage: Timestamp description string. + hostname: The hostname of host that set the cookie value. + cookie_name: The name field of the cookie. + value: The value of the cookie. + path: An URI of the page that set the cookie. + secure: Indication if this cookie should only be transmitted over a secure + channel. + httponly: An indication that the cookie cannot be accessed through client + side script. + persistent: A flag indicating cookies persistent value. + """ + super(ChromeCookieEvent, self).__init__(timestamp, usage) + if hostname.startswith('.'): + hostname = hostname[1:] + + self.host = hostname + self.cookie_name = cookie_name + self.data = value + self.path = path + self.secure = True if secure else False + self.httponly = True if httponly else False + self.persistent = True if persistent else False + + if self.secure: + scheme = u'https' + else: + scheme = u'http' + + self.url = u'{0:s}://{1:s}{2:s}'.format(scheme, hostname, path) + + +class ChromeCookiePlugin(interface.SQLitePlugin): + """Parse Chrome Cookies file.""" + + NAME = 'chrome_cookies' + DESCRIPTION = u'Parser for Chrome cookies SQLite database files.' + + # Define the needed queries. + QUERIES = [ + (('SELECT creation_utc, host_key, name, value, path, expires_utc, ' + 'secure, httponly, last_access_utc, has_expires, persistent ' + 'FROM cookies'), 'ParseCookieRow')] + + # The required tables common to Archived History and History. + REQUIRED_TABLES = frozenset(['cookies', 'meta']) + + # Point to few sources for URL information. + URLS = [ + u'http://src.chromium.org/svn/trunk/src/net/cookies/', + (u'http://www.dfinews.com/articles/2012/02/' + u'google-analytics-cookies-and-forensic-implications')] + + # Google Analytics __utmz variable translation. + # Taken from: + # http://www.dfinews.com/sites/dfinews.com/files/u739/Tab2Cookies020312.jpg + GA_UTMZ_TRANSLATION = { + 'utmcsr': 'Last source used to access.', + 'utmccn': 'Ad campaign information.', + 'utmcmd': 'Last type of visit.', + 'utmctr': 'Keywords used to find site.', + 'utmcct': 'Path to the page of referring link.'} + + def __init__(self): + """Initializes a plugin object.""" + super(ChromeCookiePlugin, self).__init__() + self._cookie_plugins = cookie_interface.GetPlugins() + + def ParseCookieRow( + self, parser_context, row, file_entry=None, parser_chain=None, + query=None, **unused_kwargs): + """Parses a cookie row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + event_object = ChromeCookieEvent( + row['creation_utc'], eventdata.EventTimestamp.CREATION_TIME, + row['host_key'], row['name'], row['value'], row['path'], row['secure'], + row['httponly'], row['persistent']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + event_object = ChromeCookieEvent( + row['last_access_utc'], eventdata.EventTimestamp.ACCESS_TIME, + row['host_key'], row['name'], row['value'], row['path'], row['secure'], + row['httponly'], row['persistent']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['has_expires']: + event_object = ChromeCookieEvent( + row['expires_utc'], 'Cookie Expires', + row['host_key'], row['name'], row['value'], row['path'], + row['secure'], row['httponly'], row['persistent']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + # Go through all cookie plugins to see if there are is any specific parsing + # needed. + hostname = row['host_key'] + if hostname.startswith('.'): + hostname = hostname[1:] + + url = u'http{0:s}://{1:s}{2:s}'.format( + u's' if row['secure'] else u'', hostname, row['path']) + + for cookie_plugin in self._cookie_plugins: + try: + cookie_plugin.Process( + parser_context, cookie_name=row['name'], cookie_data=row['value'], + url=url, parser_chain=parser_chain, file_entry=file_entry) + except errors.WrongPlugin: + pass + + +sqlite.SQLiteParser.RegisterPlugin(ChromeCookiePlugin) diff --git a/plaso/parsers/sqlite_plugins/chrome_cookies_test.py b/plaso/parsers/sqlite_plugins/chrome_cookies_test.py new file mode 100644 index 0000000..8fd0b38 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/chrome_cookies_test.py @@ -0,0 +1,135 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Google Chrome cookie database plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import chrome_cookies as chrome_cookies_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers.sqlite_plugins import chrome_cookies +from plaso.parsers.sqlite_plugins import test_lib + + +class ChromeCookiesPluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Google Chrome cookie database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = chrome_cookies.ChromeCookiePlugin() + + def testProcess(self): + """Tests the Process function on a Chrome cookie database file.""" + test_file = self._GetTestFilePath(['cookies.db']) + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file) + + event_objects = [] + extra_objects = [] + + # Since we've got both events generated by cookie plugins and the Chrome + # cookie plugin we need to separate them. + for event_object in self._GetEventObjectsFromQueue(event_queue_consumer): + if isinstance(event_object, chrome_cookies.ChromeCookieEvent): + event_objects.append(event_object) + else: + extra_objects.append(event_object) + + # The cookie database contains 560 entries: + # 560 creation timestamps. + # 560 last access timestamps. + # 560 expired timestamps. + # Then there are extra events created by plugins: + # 75 events created by Google Analytics cookies. + # In total: 1755 events. + self.assertEquals(len(event_objects), 3 * 560) + + # Double check that we've got at least the 75 Google Analytics sessions. + self.assertGreaterEqual(len(extra_objects), 75) + + # Check few "random" events to verify. + + # Check one linkedin cookie. + event_object = event_objects[124] + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.ACCESS_TIME) + self.assertEquals(event_object.host, u'www.linkedin.com') + self.assertEquals(event_object.cookie_name, u'leo_auth_token') + self.assertFalse(event_object.httponly) + self.assertEquals(event_object.url, u'http://www.linkedin.com/') + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-08-25 21:50:27.292367') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'http://www.linkedin.com/ (leo_auth_token) Flags: [HTTP only] = False ' + u'[Persistent] = True') + expected_short = u'www.linkedin.com (leo_auth_token)' + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + # Check one of the visits to rubiconproject.com. + event_object = event_objects[379] + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.ACCESS_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-01 13:54:34.949210') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(event_object.url, u'http://rubiconproject.com/') + self.assertEquals(event_object.path, u'/') + self.assertFalse(event_object.secure) + self.assertTrue(event_object.persistent) + + expected_msg = ( + u'http://rubiconproject.com/ (put_2249) Flags: [HTTP only] = False ' + u'[Persistent] = True') + self._TestGetMessageStrings( + event_object, expected_msg, u'rubiconproject.com (put_2249)') + + # Examine an event for a visit to a political blog site. + event_object = event_objects[444] + self.assertEquals( + event_object.path, + u'/2012/03/21/romney-tries-to-clean-up-etch-a-sketch-mess/') + self.assertEquals(event_object.host, u'politicalticker.blogs.cnn.com') + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-03-22 01:47:21.012022') + self.assertEquals(event_object.timestamp, expected_timestamp) + + # Examine a cookie that has an autologin entry. + event_object = event_objects[1425] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-01 13:52:56.189444') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(event_object.host, u'marvel.com') + self.assertEquals(event_object.cookie_name, u'autologin[timeout]') + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + # This particular cookie value represents a timeout value that corresponds + # to the expiration date of the cookie. + self.assertEquals(event_object.data, u'1364824322') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/chrome_extension_activity.py b/plaso/parsers/sqlite_plugins/chrome_extension_activity.py new file mode 100644 index 0000000..53e1feb --- /dev/null +++ b/plaso/parsers/sqlite_plugins/chrome_extension_activity.py @@ -0,0 +1,91 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for the Google Chrome extension activity database files. + + The Chrome extension activity is stored in SQLite database files named + Extension Activity. +""" + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +class ChromeExtensionActivityEvent(time_events.WebKitTimeEvent): + """Convenience class for a Chrome Extension Activity event.""" + DATA_TYPE = 'chrome:extension_activity:activity_log' + + def __init__(self, row): + """Initializes the event object. + + Args: + row: The row resulting from the query (instance of sqlite3.Row). + """ + # TODO: change the timestamp usage from unknown to something else. + super(ChromeExtensionActivityEvent, self).__init__( + row['time'], eventdata.EventTimestamp.UNKNOWN) + + self.extension_id = row['extension_id'] + self.action_type = row['action_type'] + self.api_name = row['api_name'] + self.args = row['args'] + self.page_url = row['page_url'] + self.page_title = row['page_title'] + self.arg_url = row['arg_url'] + self.other = row['other'] + self.activity_id = row['activity_id'] + + +class ChromeExtensionActivityPlugin(interface.SQLitePlugin): + """Plugin to parse Chrome extension activity database files.""" + + NAME = 'chrome_extension_activity' + DESCRIPTION = u'Parser for Chrome exention activitiy SQLite database files.' + + # Define the needed queries. + QUERIES = [ + (('SELECT time, extension_id, action_type, api_name, args, page_url, ' + 'page_title, arg_url, other, activity_id ' + 'FROM activitylog_uncompressed ORDER BY time'), + 'ParseActivityLogUncompressedRow')] + + REQUIRED_TABLES = frozenset([ + 'activitylog_compressed', 'string_ids', 'url_ids']) + + def ParseActivityLogUncompressedRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a file downloaded row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query (instance of sqlite3.Row). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + event_object = ChromeExtensionActivityEvent(row) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugin(ChromeExtensionActivityPlugin) diff --git a/plaso/parsers/sqlite_plugins/chrome_extension_activity_test.py b/plaso/parsers/sqlite_plugins/chrome_extension_activity_test.py new file mode 100644 index 0000000..8c807e6 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/chrome_extension_activity_test.py @@ -0,0 +1,76 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Google Chrome extension activity database plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import chrome_extension_activity as chrome_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import chrome_extension_activity +from plaso.parsers.sqlite_plugins import test_lib + + +class ChromeExtensionActivityPluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Google Chrome extension activity database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = chrome_extension_activity.ChromeExtensionActivityPlugin() + + def testProcess(self): + """Tests the Process function on a Chrome extension activity database.""" + test_file = self._GetTestFilePath(['Extension Activity']) + cache = sqlite.SQLiteCache() + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file, cache) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 56) + + event_object = event_objects[0] + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.UNKNOWN) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-11-25 21:08:23.698737') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_extension_id = u'ognampngfcbddbfemdapefohjiobgbdl' + self.assertEquals(event_object.extension_id, expected_extension_id) + + self.assertEquals(event_object.action_type, 1) + self.assertEquals(event_object.activity_id, 48) + self.assertEquals(event_object.api_name, u'browserAction.onClicked') + + expected_msg = ( + u'Chrome extension: ognampngfcbddbfemdapefohjiobgbdl ' + u'Action type: 1 ' + u'Activity identifier: 48 ' + u'API name: browserAction.onClicked') + expected_short = ( + u'ognampngfcbddbfemdapefohjiobgbdl browserAction.onClicked') + + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/chrome_test.py b/plaso/parsers/sqlite_plugins/chrome_test.py new file mode 100644 index 0000000..a473889 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/chrome_test.py @@ -0,0 +1,102 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Google Chrome History database plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import chrome as chrome_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import chrome +from plaso.parsers.sqlite_plugins import test_lib + + +class ChromeHistoryPluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Google Chrome History database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = chrome.ChromeHistoryPlugin() + + def testProcess(self): + """Tests the Process function on a Chrome History database file.""" + test_file = self._GetTestFilePath(['History']) + cache = sqlite.SQLiteCache() + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file, cache) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The History file contains 71 events (69 page visits, 1 file downloads). + self.assertEquals(len(event_objects), 71) + + # Check the first page visited entry. + event_object = event_objects[0] + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.PAGE_VISITED) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-04-07 12:03:11') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_url = u'http://start.ubuntu.com/10.04/Google/' + self.assertEquals(event_object.url, expected_url) + + expected_title = u'Ubuntu Start Page' + self.assertEquals(event_object.title, expected_title) + + expected_msg = ( + u'{0:s} ({1:s}) [count: 0] Host: start.ubuntu.com ' + u'Visit Source: [SOURCE_FIREFOX_IMPORTED] Type: [LINK - User clicked ' + u'a link] (URL not typed directly - no typed count)').format( + expected_url, expected_title) + expected_short = u'{0:s} ({1:s})'.format(expected_url, expected_title) + + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + # Check the first file downloaded entry. + event_object = event_objects[69] + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.FILE_DOWNLOADED) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-05-23 08:35:30') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_url = ( + u'http://fatloss4idiotsx.com/download/funcats/' + u'funcats_scr.exe') + self.assertEquals(event_object.url, expected_url) + + expected_full_path = u'/home/john/Downloads/funcats_scr.exe' + self.assertEquals(event_object.full_path, expected_full_path) + + expected_msg = ( + u'{0:s} ({1:s}). Received: 1132155 bytes out of: ' + u'1132155 bytes.').format( + expected_url, expected_full_path) + expected_short = u'{0:s} downloaded (1132155 bytes)'.format( + expected_full_path) + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/firefox.py b/plaso/parsers/sqlite_plugins/firefox.py new file mode 100644 index 0000000..558faed --- /dev/null +++ b/plaso/parsers/sqlite_plugins/firefox.py @@ -0,0 +1,476 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a parser for the Mozilla Firefox history.""" + +import sqlite3 + +from plaso.events import time_events +from plaso.lib import event +from plaso.lib import eventdata +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +# Check SQlite version, bail out early if too old. +if sqlite3.sqlite_version_info < (3, 7, 8): + raise ImportWarning( + 'FirefoxHistoryParser requires at least SQLite version 3.7.8.') + + +class FirefoxPlacesBookmarkAnnotation(time_events.TimestampEvent): + """Convenience class for a Firefox bookmark annotation event.""" + + DATA_TYPE = 'firefox:places:bookmark_annotation' + + def __init__(self, timestamp, usage, row_id, title, url, content): + """Initializes the event object. + + Args: + timestamp: The timestamp value. + usage: Timestamp description string. + row_id: The identifier of the corresponding row. + title: The title of the bookmark folder. + url: The bookmarked URL. + content: The content of the annotation. + """ + super(FirefoxPlacesBookmarkAnnotation, self).__init__( + timestamp, usage) + + self.offset = row_id + self.title = title + self.url = url + self.content = content + + +class FirefoxPlacesBookmarkFolder(time_events.TimestampEvent): + """Convenience class for a Firefox bookmark folder event.""" + + DATA_TYPE = 'firefox:places:bookmark_folder' + + def __init__(self, timestamp, usage, row_id, title): + """Initializes the event object. + + Args: + timestamp: The timestamp value. + usage: Timestamp description string. + row_id: The identifier of the corresponding row. + title: The title of the bookmark folder. + """ + super(FirefoxPlacesBookmarkFolder, self).__init__( + timestamp, usage) + + self.offset = row_id + self.title = title + + +class FirefoxPlacesBookmark(time_events.TimestampEvent): + """Convenience class for a Firefox bookmark event.""" + + DATA_TYPE = 'firefox:places:bookmark' + + # TODO: move to formatter. + _TYPES = { + 1: 'URL', + 2: 'Folder', + 3: 'Separator', + } + _TYPES.setdefault('N/A') + + # pylint: disable=redefined-builtin + def __init__(self, timestamp, usage, row_id, type, title, url, places_title, + hostname, visit_count): + """Initializes the event object. + + Args: + timestamp: The timestamp value. + usage: Timestamp description string. + row_id: The identifier of the corresponding row. + type: Integer value containing the bookmark type. + title: The title of the bookmark folder. + url: The bookmarked URL. + places_title: The places title. + hostname: The hostname. + visit_count: The visit count. + """ + super(FirefoxPlacesBookmark, self).__init__(timestamp, usage) + + self.offset = row_id + self.type = self._TYPES[type] + self.title = title + self.url = url + self.places_title = places_title + self.host = hostname + self.visit_count = visit_count + + +class FirefoxPlacesPageVisitedEvent(event.EventObject): + """Convenience class for a Firefox page visited event.""" + + DATA_TYPE = 'firefox:places:page_visited' + + def __init__(self, timestamp, row_id, url, title, hostname, visit_count, + visit_type, extra): + """Initializes the event object. + + Args: + timestamp: The timestamp time value. The timestamp contains the + number of microseconds since Jan 1, 1970 00:00:00 UTC. + row_id: The identifier of the corresponding row. + url: The URL of the visited page. + title: The title of the visited page. + hostname: The visited hostname. + visit_count: The visit count. + visit_type: The transition type for the event. + extra: A list containing extra event data (TODO refactor). + """ + super(FirefoxPlacesPageVisitedEvent, self).__init__() + + self.timestamp = timestamp + self.timestamp_desc = eventdata.EventTimestamp.PAGE_VISITED + + self.offset = row_id + self.url = url + self.title = title + self.host = hostname + self.visit_count = visit_count + self.visit_type = visit_type + if extra: + self.extra = extra + + +class FirefoxDownload(time_events.TimestampEvent): + """Convenience class for a Firefox download event.""" + + DATA_TYPE = 'firefox:downloads:download' + + def __init__(self, timestamp, usage, row_id, name, url, referrer, full_path, + temporary_location, received_bytes, total_bytes, mime_type): + """Initializes the event object. + + Args: + timestamp: The timestamp value. + usage: Timestamp description string. + row_id: The identifier of the corresponding row. + name: The name of the download. + url: The source URL of the download. + referrer: The referrer URL of the download. + full_path: The full path of the target of the download. + temporary_location: The temporary location of the download. + received_bytes: The number of bytes received. + total_bytes: The total number of bytes of the download. + mime_type: The mime type of the download. + """ + super(FirefoxDownload, self).__init__(timestamp, usage) + + self.offset = row_id + self.name = name + self.url = url + self.referrer = referrer + self.full_path = full_path + self.temporary_location = temporary_location + self.received_bytes = received_bytes + self.total_bytes = total_bytes + self.mime_type = mime_type + + +class FirefoxHistoryPlugin(interface.SQLitePlugin): + """Parses a Firefox history file. + + The Firefox history is stored in a SQLite database file named + places.sqlite. + """ + + NAME = 'firefox_history' + DESCRIPTION = u'Parser for Firefox history SQLite database files.' + + # Define the needed queries. + QUERIES = [ + (('SELECT moz_historyvisits.id, moz_places.url, moz_places.title, ' + 'moz_places.visit_count, moz_historyvisits.visit_date, ' + 'moz_historyvisits.from_visit, moz_places.rev_host, ' + 'moz_places.hidden, moz_places.typed, moz_historyvisits.visit_type ' + 'FROM moz_places, moz_historyvisits ' + 'WHERE moz_places.id = moz_historyvisits.place_id'), + 'ParsePageVisitedRow'), + (('SELECT moz_bookmarks.type, moz_bookmarks.title AS bookmark_title, ' + 'moz_bookmarks.dateAdded, moz_bookmarks.lastModified, ' + 'moz_places.url, moz_places.title AS places_title, ' + 'moz_places.rev_host, moz_places.visit_count, moz_bookmarks.id ' + 'FROM moz_places, moz_bookmarks WHERE moz_bookmarks.fk = moz_places.id ' + 'AND moz_bookmarks.type <> 3'), + 'ParseBookmarkRow'), + (('SELECT moz_items_annos.content, moz_items_annos.dateAdded, ' + 'moz_items_annos.lastModified, moz_bookmarks.title, ' + 'moz_places.url, moz_places.rev_host, moz_items_annos.id ' + 'FROM moz_items_annos, moz_bookmarks, moz_places ' + 'WHERE moz_items_annos.item_id = moz_bookmarks.id ' + 'AND moz_bookmarks.fk = moz_places.id'), + 'ParseBookmarkAnnotationRow'), + (('SELECT moz_bookmarks.id, moz_bookmarks.title,' + 'moz_bookmarks.dateAdded, moz_bookmarks.lastModified ' + 'FROM moz_bookmarks WHERE moz_bookmarks.type = 2'), + 'ParseBookmarkFolderRow')] + + # The required tables. + REQUIRED_TABLES = frozenset([ + 'moz_places', 'moz_historyvisits', 'moz_bookmarks', 'moz_items_annos']) + + # Cache queries. + URL_CACHE_QUERY = ( + 'SELECT h.id AS id, p.url, p.rev_host FROM moz_places p, ' + 'moz_historyvisits h WHERE p.id = h.place_id') + + def ParseBookmarkAnnotationRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a bookmark annotation row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + if row['dateAdded']: + event_object = FirefoxPlacesBookmarkAnnotation( + row['dateAdded'], eventdata.EventTimestamp.ADDED_TIME, + row['id'], row['title'], row['url'], row['content']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['lastModified']: + event_object = FirefoxPlacesBookmarkAnnotation( + row['lastModified'], eventdata.EventTimestamp.MODIFICATION_TIME, + row['id'], row['title'], row['url'], row['content']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + def ParseBookmarkFolderRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a bookmark folder row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + if not row['title']: + title = 'N/A' + else: + title = row['title'] + + if row['dateAdded']: + event_object = FirefoxPlacesBookmarkFolder( + row['dateAdded'], eventdata.EventTimestamp.ADDED_TIME, + row['id'], title) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['lastModified']: + event_object = FirefoxPlacesBookmarkFolder( + row['lastModified'], eventdata.EventTimestamp.MODIFICATION_TIME, + row['id'], title) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + def ParseBookmarkRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a bookmark row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + if row['dateAdded']: + event_object = FirefoxPlacesBookmark( + row['dateAdded'], eventdata.EventTimestamp.ADDED_TIME, + row['id'], row['type'], row['bookmark_title'], row['url'], + row['places_title'], getattr(row, 'rev_host', 'N/A'), + row['visit_count']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['lastModified']: + event_object = FirefoxPlacesBookmark( + row['lastModified'], eventdata.EventTimestamp.MODIFICATION_TIME, + row['id'], row['type'], row['bookmark_title'], row['url'], + row['places_title'], getattr(row, 'rev_host', 'N/A'), + row['visit_count']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + def ParsePageVisitedRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + cache=None, database=None, **unused_kwargs): + """Parses a page visited row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + cache: A cache object (instance of SQLiteCache). + database: A database object (instance of SQLiteDatabase). + """ + # TODO: make extra conditional formatting. + extras = [] + if row['from_visit']: + extras.append(u'visited from: {0}'.format( + self._GetUrl(row['from_visit'], cache, database))) + + if row['hidden'] == '1': + extras.append('(url hidden)') + + if row['typed'] == '1': + extras.append('(directly typed)') + else: + extras.append('(URL not typed directly)') + + if row['visit_date']: + event_object = FirefoxPlacesPageVisitedEvent( + row['visit_date'], row['id'], row['url'], row['title'], + self._ReverseHostname(row['rev_host']), row['visit_count'], + row['visit_type'], extras) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + def _ReverseHostname(self, hostname): + """Reverses the hostname and strips the leading dot. + + The hostname entry is reversed: + moc.elgoog.www. + Should be: + www.google.com + + Args: + hostname: The reversed hostname. + + Returns: + Reversed string without a leading dot. + """ + if not hostname: + return '' + + if len(hostname) > 1: + if hostname[-1] == '.': + return hostname[::-1][1:] + else: + return hostname[::-1][0:] + return hostname + + def _GetUrl(self, url_id, cache, database): + """Return an URL from a reference to an entry in the from_visit table.""" + url_cache_results = cache.GetResults('url') + if not url_cache_results: + cursor = database.cursor + result_set = cursor.execute(self.URL_CACHE_QUERY) + cache.CacheQueryResults( + result_set, 'url', 'id', ('url', 'rev_host')) + url_cache_results = cache.GetResults('url') + + url, reverse_host = url_cache_results.get(url_id, [u'', u'']) + + if not url: + return u'' + + hostname = self._ReverseHostname(reverse_host) + return u'{:s} ({:s})'.format(url, hostname) + + +class FirefoxDownloadsPlugin(interface.SQLitePlugin): + """Parses a Firefox downloads file. + + The Firefox downloads history is stored in a SQLite database file named + downloads.sqlite. + """ + + NAME = 'firefox_downloads' + DESCRIPTION = u'Parser for Firefox downloads SQLite database files.' + + # Define the needed queries. + QUERIES = [ + (('SELECT moz_downloads.id, moz_downloads.name, moz_downloads.source, ' + 'moz_downloads.target, moz_downloads.tempPath, ' + 'moz_downloads.startTime, moz_downloads.endTime, moz_downloads.state, ' + 'moz_downloads.referrer, moz_downloads.currBytes, ' + 'moz_downloads.maxBytes, moz_downloads.mimeType ' + 'FROM moz_downloads'), + 'ParseDownloadsRow')] + + # The required tables. + REQUIRED_TABLES = frozenset(['moz_downloads']) + + def ParseDownloadsRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a downloads row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + if row['startTime']: + event_object = FirefoxDownload( + row['startTime'], eventdata.EventTimestamp.START_TIME, + row['id'], row['name'], row['source'], row['referrer'], row['target'], + row['tempPath'], row['currBytes'], row['maxBytes'], row['mimeType']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['endTime']: + event_object = FirefoxDownload( + row['endTime'], eventdata.EventTimestamp.END_TIME, + row['id'], row['name'], row['source'], row['referrer'], row['target'], + row['tempPath'], row['currBytes'], row['maxBytes'], row['mimeType']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugins( + [FirefoxHistoryPlugin, FirefoxDownloadsPlugin]) diff --git a/plaso/parsers/sqlite_plugins/firefox_cookies.py b/plaso/parsers/sqlite_plugins/firefox_cookies.py new file mode 100644 index 0000000..b9ee8c9 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/firefox_cookies.py @@ -0,0 +1,163 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for the Firefox Cookie database.""" + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.lib import timelib +# Register the cookie plugins. +from plaso.parsers import cookie_plugins # pylint: disable=unused-import +from plaso.parsers import sqlite +from plaso.parsers.cookie_plugins import interface as cookie_interface +from plaso.parsers.sqlite_plugins import interface + + +class FirefoxCookieEvent(time_events.TimestampEvent): + """Convenience class for a Firefox Cookie event.""" + + DATA_TYPE = 'firefox:cookie:entry' + + def __init__( + self, timestamp, usage, identifier, hostname, cookie_name, value, path, + secure, httponly): + """Initializes the event. + + Args: + timestamp: The timestamp value in WebKit format.. + usage: Timestamp description string. + identifier: The row identifier. + hostname: The hostname of host that set the cookie value. + cookie_name: The name field of the cookie. + value: The value of the cookie. + path: An URI of the page that set the cookie. + secure: Indication if this cookie should only be transmitted over a secure + channel. + httponly: An indication that the cookie cannot be accessed through client + side script. + """ + super(FirefoxCookieEvent, self).__init__(timestamp, usage) + if hostname.startswith('.'): + hostname = hostname[1:] + + self.offset = identifier + self.host = hostname + self.cookie_name = cookie_name + self.data = value + self.path = path + self.secure = True if secure else False + self.httponly = True if httponly else False + + if self.secure: + scheme = u'https' + else: + scheme = u'http' + + self.url = u'{0:s}://{1:s}{2:s}'.format(scheme, hostname, path) + + +class FirefoxCookiePlugin(interface.SQLitePlugin): + """Parse Firefox Cookies file.""" + + NAME = 'firefox_cookies' + DESCRIPTION = u'Parser for Firefox cookies SQLite database files.' + + # Define the needed queries. + QUERIES = [ + (('SELECT id, baseDomain, name, value, host, path, expiry, lastAccessed, ' + 'creationTime, isSecure, isHttpOnly FROM moz_cookies'), + 'ParseCookieRow')] + + # The required tables common to Archived History and History. + REQUIRED_TABLES = frozenset(['moz_cookies']) + + # Point to few sources for URL information. + URLS = [ + (u'https://hg.mozilla.org/mozilla-central/file/349a2f003529/netwerk/' + u'cookie/nsCookie.h')] + + def __init__(self): + """Initializes a plugin object.""" + super(FirefoxCookiePlugin, self).__init__() + self._cookie_plugins = cookie_interface.GetPlugins() + + def ParseCookieRow( + self, parser_context, row, file_entry=None, parser_chain=None, + query=None, **unused_kwargs): + """Parses a cookie row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + if row['creationTime']: + event_object = FirefoxCookieEvent( + row['creationTime'], eventdata.EventTimestamp.CREATION_TIME, + row['id'], row['host'], row['name'], row['value'], row['path'], + row['isSecure'], row['isHttpOnly']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['lastAccessed']: + event_object = FirefoxCookieEvent( + row['lastAccessed'], eventdata.EventTimestamp.ACCESS_TIME, row['id'], + row['host'], row['name'], row['value'], row['path'], row['isSecure'], + row['isHttpOnly']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['expiry']: + # Expiry time (nsCookieService::GetExpiry in + # netwerk/cookie/nsCookieService.cpp). + # It's calculated as the difference between the server time and the time + # the server wants the cookie to expire and adding that difference to the + # client time. This localizes the client time regardless of whether or not + # the TZ environment variable was set on the client. + timestamp = timelib.Timestamp.FromPosixTime(row['expiry']) + event_object = FirefoxCookieEvent( + timestamp, u'Cookie Expires', row['id'], row['host'], row['name'], + row['value'], row['path'], row['isSecure'], row['isHttpOnly']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + # Go through all cookie plugins to see if there are is any specific parsing + # needed. + hostname = row['host'] + if hostname.startswith('.'): + hostname = hostname[1:] + url = u'http{0:s}://{1:s}{2:s}'.format( + u's' if row['isSecure'] else u'', hostname, row['path']) + + for cookie_plugin in self._cookie_plugins: + try: + cookie_plugin.Process( + parser_context, cookie_name=row['name'], cookie_data=row['value'], + url=url, file_entry=file_entry, parser_chain=parser_chain) + except errors.WrongPlugin: + pass + + +sqlite.SQLiteParser.RegisterPlugin(FirefoxCookiePlugin) diff --git a/plaso/parsers/sqlite_plugins/firefox_cookies_test.py b/plaso/parsers/sqlite_plugins/firefox_cookies_test.py new file mode 100644 index 0000000..77397cd --- /dev/null +++ b/plaso/parsers/sqlite_plugins/firefox_cookies_test.py @@ -0,0 +1,107 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Firefox cookie database plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import firefox_cookies as firefox_cookies_formatter +from plaso.lib import timelib_test +from plaso.parsers.sqlite_plugins import firefox_cookies +from plaso.parsers.sqlite_plugins import test_lib + + +class FirefoxCookiesPluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Firefox cookie database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = firefox_cookies.FirefoxCookiePlugin() + + def testProcess(self): + """Tests the Process function on a Firefox 29 cookie database file.""" + test_file = self._GetTestFilePath(['firefox_cookies.sqlite']) + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file) + + event_objects = [] + extra_objects = [] + + # sqlite> SELECT COUNT(id) FROM moz_cookies; + # 90 + # Thus the cookie database contains 93 entries: + # 90 Last Access Time + # 90 Cookie Expires + # 90 Creation Time + # + # And then in addition the following entries are added due to cookie + # plugins (TODO filter these out since adding new cookie plugin will + # change this number and thus affect this test): + # 15 Last Visited Time + # 5 Analytics Previous Time + # 5 Analytics Creation Time + # + # In total: 93 * 3 + 15 + 5 + 5 = 304 events. + for event_object in self._GetEventObjectsFromQueue(event_queue_consumer): + if isinstance(event_object, firefox_cookies.FirefoxCookieEvent): + event_objects.append(event_object) + else: + extra_objects.append(event_object) + + self.assertEquals(len(event_objects), 90 * 3) + self.assertGreaterEqual(len(extra_objects), 25) + + # Check one greenqloud.com event + event_object = event_objects[32] + self.assertEquals( + event_object.timestamp_desc, 'Cookie Expires') + self.assertEquals(event_object.host, u's.greenqloud.com') + self.assertEquals(event_object.cookie_name, u'__utma') + self.assertFalse(event_object.httponly) + self.assertEquals(event_object.url, u'http://s.greenqloud.com/') + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2015-10-30 21:56:03') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'http://s.greenqloud.com/ (__utma) Flags: [HTTP only]: False') + expected_short = u's.greenqloud.com (__utma)' + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + # Check one of the visits to pubmatic.com. + event_object = event_objects[62] + self.assertEquals( + event_object.timestamp_desc, u'Cookie Expires') + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-29 21:56:04') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(event_object.url, u'http://pubmatic.com/') + self.assertEquals(event_object.path, u'/') + self.assertFalse(event_object.secure) + + expected_msg = ( + u'http://pubmatic.com/ (KRTBCOOKIE_391) Flags: [HTTP only]: False') + self._TestGetMessageStrings( + event_object, expected_msg, u'pubmatic.com (KRTBCOOKIE_391)') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/firefox_test.py b/plaso/parsers/sqlite_plugins/firefox_test.py new file mode 100644 index 0000000..1a57889 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/firefox_test.py @@ -0,0 +1,277 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Mozilla Firefox history database plugin.""" + +import collections +import unittest + +# pylint: disable=unused-import +from plaso.formatters import firefox as firefox_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import firefox +from plaso.parsers.sqlite_plugins import test_lib + + +class FirefoxHistoryPluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Mozilla Firefox history database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = firefox.FirefoxHistoryPlugin() + + def testProcessPriorTo24(self): + """Tests the Process function on a Firefox History database file.""" + # This is probably version 23 but potentially an older version. + test_file = self._GetTestFilePath(['places.sqlite']) + cache = sqlite.SQLiteCache() + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file, cache) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The places.sqlite file contains 205 events (1 page visit, + # 2 x 91 bookmark records, 2 x 3 bookmark annotations, + # 2 x 8 bookmark folders). + # However there are three events that do not have a timestamp + # so the test file will show 202 extracted events. + self.assertEquals(len(event_objects), 202) + + # Check the first page visited event. + event_object = event_objects[0] + + self.assertEquals(event_object.data_type, 'firefox:places:page_visited') + + self.assertEquals(event_object.timestamp_desc, + eventdata.EventTimestamp.PAGE_VISITED) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-07-01 11:16:21.371935') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_url = u'http://news.google.com/' + self.assertEquals(event_object.url, expected_url) + + expected_title = u'Google News' + self.assertEquals(event_object.title, expected_title) + + expected_msg = ( + u'{0:s} ({1:s}) [count: 1] Host: news.google.com ' + u'(URL not typed directly) Transition: TYPED').format( + expected_url, expected_title) + expected_short = u'URL: {}'.format(expected_url) + + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + # Check the first bookmark event. + event_object = event_objects[1] + + self.assertEquals(event_object.data_type, 'firefox:places:bookmark') + + self.assertEquals(event_object.timestamp_desc, + eventdata.EventTimestamp.ADDED_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + u'2011-07-01 11:13:59.266344+00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + + # Check the second bookmark event. + event_object = event_objects[2] + + self.assertEquals(event_object.data_type, 'firefox:places:bookmark') + + self.assertEquals(event_object.timestamp_desc, + eventdata.EventTimestamp.MODIFICATION_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + u'2011-07-01 11:13:59.267198+00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_url = ( + u'place:folder=BOOKMARKS_MENU&folder=UNFILED_BOOKMARKS&folder=TOOLBAR&' + u'sort=12&excludeQueries=1&excludeItemIfParentHasAnnotation=livemark%2F' + u'feedURI&maxResults=10&queryType=1') + self.assertEquals(event_object.url, expected_url) + + expected_title = u'Recently Bookmarked' + self.assertEquals(event_object.title, expected_title) + + expected_msg = ( + u'Bookmark URL {0:s} ({1:s}) [folder=BOOKMARKS_MENU&' + u'folder=UNFILED_BOOKMARKS&folder=TOOLBAR&sort=12&excludeQueries=1&' + u'excludeItemIfParentHasAnnotation=livemark%2FfeedURI&maxResults=10&' + u'queryType=1] visit count 0').format( + expected_title, expected_url) + expected_short = ( + u'Bookmarked Recently Bookmarked ' + u'(place:folder=BOOKMARKS_MENU&folder=UNFILED_BO...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + # Check the first bookmark annotation event. + event_object = event_objects[183] + + self.assertEquals( + event_object.data_type, 'firefox:places:bookmark_annotation') + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + u'2011-07-01 11:13:59.267146+00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + + # Check another bookmark annotation event. + event_object = event_objects[184] + + self.assertEquals( + event_object.data_type, 'firefox:places:bookmark_annotation') + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + u'2011-07-01 11:13:59.267605+00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_url = u'place:sort=14&type=6&maxResults=10&queryType=1' + self.assertEquals(event_object.url, expected_url) + + expected_title = u'Recent Tags' + self.assertEquals(event_object.title, expected_title) + + expected_msg = ( + u'Bookmark Annotation: [RecentTags] to bookmark ' + u'[{0:s}] ({1:s})').format( + expected_title, expected_url) + expected_short = u'Bookmark Annotation: Recent Tags' + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + # Check the second last bookmark folder event. + event_object = event_objects[200] + + self.assertEquals(event_object.data_type, 'firefox:places:bookmark_folder') + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.ADDED_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + u'2011-03-21 10:05:01.553774+00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + # Check the last bookmark folder event. + event_object = event_objects[201] + + self.assertEquals( + event_object.data_type, 'firefox:places:bookmark_folder') + + self.assertEquals( + event_object.timestamp_desc, + eventdata.EventTimestamp.MODIFICATION_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + u'2011-07-01 11:14:11.766851+00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_title = u'Latest Headlines' + self.assertEquals(event_object.title, expected_title) + + expected_msg = expected_title + expected_short = expected_title + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + def testProcessVersion25(self): + """Tests the Process function on a Firefox History database file v 25.""" + test_file = self._GetTestFilePath(['places_new.sqlite']) + cache = sqlite.SQLiteCache() + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file, cache) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The places.sqlite file contains 84 events: + # 34 page visits. + # 28 bookmarks + # 14 bookmark folders + # 8 annotations + self.assertEquals(len(event_objects), 84) + counter = collections.Counter() + for event_object in event_objects: + counter[event_object.data_type] += 1 + + self.assertEquals(counter['firefox:places:bookmark'], 28) + self.assertEquals(counter['firefox:places:page_visited'], 34) + self.assertEquals(counter['firefox:places:bookmark_folder'], 14) + self.assertEquals(counter['firefox:places:bookmark_annotation'], 8) + + random_event = event_objects[10] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-10-30 21:57:11.281942') + self.assertEquals(random_event.timestamp, expected_timestamp) + + expected_short = u'URL: http://code.google.com/p/plaso' + expected_msg = ( + u'http://code.google.com/p/plaso [count: 1] Host: code.google.com ' + u'(URL not typed directly) Transition: TYPED') + + self._TestGetMessageStrings(random_event, expected_msg, expected_short) + + +class FirefoxDownloadsPluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Mozilla Firefox downloads database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = firefox.FirefoxDownloadsPlugin() + + def testProcessVersion25(self): + """Tests the Process function on a Firefox Downloads database file.""" + test_file = self._GetTestFilePath(['downloads.sqlite']) + cache = sqlite.SQLiteCache() + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file, cache) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The downloads.sqlite file contains 2 events (1 download). + self.assertEquals(len(event_objects), 2) + + # Check the first page visited event. + event_object = event_objects[0] + + self.assertEquals(event_object.data_type, 'firefox:downloads:download') + + self.assertEquals(event_object.timestamp_desc, + eventdata.EventTimestamp.START_TIME) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + u'2013-07-18 18:59:59.312000+00:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_url = ( + u'https://plaso.googlecode.com/files/' + u'plaso-static-1.0.1-win32-vs2008.zip') + self.assertEquals(event_object.url, expected_url) + + expected_full_path = u'file:///D:/plaso-static-1.0.1-win32-vs2008.zip' + self.assertEquals(event_object.full_path, expected_full_path) + + self.assertEquals(event_object.received_bytes, 15974599) + self.assertEquals(event_object.total_bytes, 15974599) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/gdrive.py b/plaso/parsers/sqlite_plugins/gdrive.py new file mode 100644 index 0000000..ac435d1 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/gdrive.py @@ -0,0 +1,268 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a parser for the Google Drive snaphots. + +The Google Drive snapshots are stored in SQLite database files named +snapshot.db. +""" + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class GoogleDriveSnapshotCloudEntryEvent(time_events.PosixTimeEvent): + """Convenience class for a Google Drive snapshot cloud entry.""" + + DATA_TYPE = 'gdrive:snapshot:cloud_entry' + + # TODO: this could be moved to the formatter. + # The following definition for values can be found on Patrick Olson's blog: + # http://www.sysforensics.org/2012/05/google-drive-forensics-notes.html + _DOC_TYPES = { + 0: u'FOLDER', + 1: u'FILE', + 2: u'PRESENTATION', + 3: u'UNKNOWN', + 4: u'SPREADSHEET', + 5: u'DRAWING', + 6: u'DOCUMENT', + 7: u'TABLE', + } + + def __init__(self, posix_time, usage, url, path, size, doc_type, shared): + """Initializes the event. + + Args: + posix_time: The POSIX time value. + usage: The description of the usage of the time value. + url: The URL of the file as in the cloud. + path: The path of the file. + size: The size of the file. + doc_type: Integer value containing the document type. + shared: A string indicating whether or not this is a shared document. + """ + super(GoogleDriveSnapshotCloudEntryEvent, self).__init__( + posix_time, usage) + + self.url = url + self.path = path + self.size = size + self.document_type = self._DOC_TYPES.get(doc_type, u'UNKNOWN') + self.shared = shared + + +class GoogleDriveSnapshotLocalEntryEvent(time_events.PosixTimeEvent): + """Convenience class for a Google Drive snapshot local entry event.""" + + DATA_TYPE = 'gdrive:snapshot:local_entry' + + def __init__(self, posix_time, local_path, size): + """Initializes the event object. + + Args: + posix_time: The POSIX time value. + local_path: The local path of the file. + size: The size of the file. + """ + super(GoogleDriveSnapshotLocalEntryEvent, self).__init__( + posix_time, eventdata.EventTimestamp.MODIFICATION_TIME) + + self.path = local_path + self.size = size + + +class GoogleDrivePlugin(interface.SQLitePlugin): + """SQLite plugin for Google Drive snapshot.db files.""" + + NAME = 'google_drive' + DESCRIPTION = u'Parser for Google Drive SQLite database files.' + + # Define the needed queries. + QUERIES = [ + ((u'SELECT e.resource_id, e.filename, e.modified, e.created, e.size, ' + u'e.doc_type, e.shared, e.checksum, e.url, r.parent_resource_id FROM ' + u'cloud_entry AS e, cloud_relations AS r WHERE r.child_resource_id = ' + u'e.resource_id AND e.modified IS NOT NULL;'), 'ParseCloudEntryRow'), + ((u'SELECT inode_number, filename, modified, checksum, size FROM ' + u'local_entry WHERE modified IS NOT NULL;'), 'ParseLocalEntryRow')] + + # The required tables. + REQUIRED_TABLES = frozenset([ + 'cloud_entry', 'cloud_relations', 'local_entry', 'local_relations', + 'mapping', 'overlay_status']) + + # Queries used to build cache. + LOCAL_PATH_CACHE_QUERY = ( + u'SELECT r.child_inode_number, r.parent_inode_number, e.filename FROM ' + u'local_relations AS r, local_entry AS e WHERE r.child_inode_number = ' + u'e.inode_number') + CLOUD_PATH_CACHE_QUERY = ( + u'SELECT e.filename, e.resource_id, r.parent_resource_id AS parent ' + u'FROM cloud_entry AS e, cloud_relations AS r WHERE e.doc_type = 0 ' + u'AND e.resource_id = r.child_resource_id') + + def GetLocalPath(self, inode, cache, database): + """Return local path for a given inode. + + Args: + inode: The inode number for the file. + cache: A cache object (instance of SQLiteCache). + database: A database object (instance of SQLiteDatabase). + + Returns: + A full path, including the filename of the given inode value. + """ + local_path = cache.GetResults('local_path') + if not local_path: + cursor = database.cursor + results = cursor.execute(self.LOCAL_PATH_CACHE_QUERY) + cache.CacheQueryResults( + results, 'local_path', 'child_inode_number', + ('parent_inode_number', 'filename')) + local_path = cache.GetResults('local_path') + + parent, path = local_path.get(inode, [None, None]) + + # TODO: Read the local_sync_root from the sync_config.db and use that + # for a root value. + root_value = u'%local_sync_root%/' + + if not path: + return root_value + + paths = [] + while path: + paths.append(path) + parent, path = local_path.get(parent, [None, None]) + + if not paths: + return root_value + + # Paths are built top level to root so we need to reverse the list to + # represent them in the traditional order. + paths.reverse() + return root_value + u'/'.join(paths) + + def GetCloudPath(self, resource_id, cache, database): + """Return cloud path given a resource id. + + Args: + resource_id: The resource_id for the file. + cache: The local cache object. + database: A database object (instance of SQLiteDatabase). + + Returns: + A full path to the resource value. + """ + cloud_path = cache.GetResults('cloud_path') + if not cloud_path: + cursor = database.cursor + results = cursor.execute(self.CLOUD_PATH_CACHE_QUERY) + cache.CacheQueryResults( + results, 'cloud_path', 'resource_id', ('filename', 'parent')) + cloud_path = cache.GetResults('cloud_path') + + if resource_id == u'folder:root': + return u'/' + + paths = [] + parent_path, parent_id = cloud_path.get(resource_id, [u'', u'']) + while parent_path: + if parent_path == u'folder:root': + break + paths.append(parent_path) + parent_path, parent_id = cloud_path.get(parent_id, [u'', u'']) + + if not paths: + return u'/' + + # Paths are built top level to root so we need to reverse the list to + # represent them in the traditional order. + paths.reverse() + return u'/{0:s}/'.format(u'/'.join(paths)) + + def ParseCloudEntryRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + cache=None, database=None, **unused_kwargs): + """Parses a cloud entry row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + cache: The local cache object. + database: The database object. + """ + cloud_path = self.GetCloudPath(row['parent_resource_id'], cache, database) + cloud_filename = u'{0:s}{1:s}'.format(cloud_path, row['filename']) + + if row['shared']: + shared = 'Shared' + else: + shared = 'Private' + + event_object = GoogleDriveSnapshotCloudEntryEvent( + row['modified'], eventdata.EventTimestamp.MODIFICATION_TIME, + row['url'], cloud_filename, row['size'], row['doc_type'], shared) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['created']: + event_object = GoogleDriveSnapshotCloudEntryEvent( + row['created'], eventdata.EventTimestamp.CREATION_TIME, + row['url'], cloud_filename, row['size'], row['doc_type'], shared) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + def ParseLocalEntryRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + cache=None, database=None, **unused_kwargs): + """Parses a local entry row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + cache: The local cache object (instance of SQLiteCache). + database: A database object (instance of SQLiteDatabase). + """ + local_path = self.GetLocalPath(row['inode_number'], cache, database) + + event_object = GoogleDriveSnapshotLocalEntryEvent( + row['modified'], local_path, row['size']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugin(GoogleDrivePlugin) diff --git a/plaso/parsers/sqlite_plugins/gdrive_test.py b/plaso/parsers/sqlite_plugins/gdrive_test.py new file mode 100644 index 0000000..e6889b0 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/gdrive_test.py @@ -0,0 +1,104 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Google Drive database plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import gdrive as gdrive_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import gdrive +from plaso.parsers.sqlite_plugins import test_lib + + +class GoogleDrivePluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Google Drive database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = gdrive.GoogleDrivePlugin() + + def testProcess(self): + """Tests the Process function on a Google Drive database file.""" + test_file = self._GetTestFilePath(['snapshot.db']) + cache = sqlite.SQLiteCache() + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file, cache=cache) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 30) + + # Let's verify that we've got the correct balance of cloud and local + # entry events. + # 10 files mounting to: + # 20 Cloud Entries (two timestamps per file). + # 10 Local Entries (one timestamp per file). + local_entries = [] + cloud_entries = [] + for event_object in event_objects: + if event_object.data_type == 'gdrive:snapshot:local_entry': + local_entries.append(event_object) + else: + cloud_entries.append(event_object) + self.assertEquals(len(local_entries), 10) + self.assertEquals(len(cloud_entries), 20) + + # Test one local and one cloud entry. + event_object = local_entries[5] + + file_path = ( + u'%local_sync_root%/Top Secret/Enn meiri ' + u'leyndarm\xe1l/S\xfdnileiki - \xd6rverpi.gdoc') + self.assertEquals(event_object.path, file_path) + + expected_msg = u'File Path: {} Size: 184'.format(file_path) + + self._TestGetMessageStrings(event_object, expected_msg, file_path) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-01-28 00:11:25') + self.assertEquals(event_object.timestamp, expected_timestamp) + + event_object = cloud_entries[16] + + self.assertEquals(event_object.document_type, u'DOCUMENT') + self.assertEquals( + event_object.timestamp_desc, + eventdata.EventTimestamp.MODIFICATION_TIME) + self.assertEquals(event_object.url, ( + u'https://docs.google.com/document/d/' + u'1ypXwXhQWliiMSQN9S5M0K6Wh39XF4Uz4GmY-njMf-Z0/edit?usp=docslist_api')) + + expected_msg = ( + u'File Path: /Almenningur/Saklausa hli\xf0in [Private] Size: 0 URL: ' + u'https://docs.google.com/document/d/' + u'1ypXwXhQWliiMSQN9S5M0K6Wh39XF4Uz4GmY-njMf-Z0/edit?usp=docslist_api ' + u'Type: DOCUMENT') + expected_short = u'/Almenningur/Saklausa hli\xf0in' + + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-01-28 00:12:27') + self.assertEquals(event_object.timestamp, expected_timestamp) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/interface.py b/plaso/parsers/sqlite_plugins/interface.py new file mode 100644 index 0000000..b77003b --- /dev/null +++ b/plaso/parsers/sqlite_plugins/interface.py @@ -0,0 +1,121 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a SQLite parser.""" + +import logging + +import sqlite3 + +from plaso.lib import errors +from plaso.parsers import plugins + + +class SQLitePlugin(plugins.BasePlugin): + """A SQLite plugin for Plaso.""" + + NAME = 'sqlite' + DESCRIPTION = u'Parser for SQLite database files.' + + # Queries to be executed. + # Should be a list of tuples with two entries, SQLCommand and callback + # function name. + QUERIES = [] + + # List of tables that should be present in the database, for verification. + REQUIRED_TABLES = frozenset([]) + + def GetEntries( + self, parser_context, file_entry=None, parser_chain=None, cache=None, + database=None, **kwargs): + """Extracts event objects from a SQLite database. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + cache: A SQLiteCache object. + database: A database object (instance of SQLiteDatabase). + """ + for query, callback_method in self.QUERIES: + try: + callback = getattr(self, callback_method, None) + if callback is None: + logging.warning( + u'[{0:s}] missing callback method: {1:s} for query: {2:s}'.format( + self.NAME, callback_method, query)) + continue + + cursor = database.cursor + sql_results = cursor.execute(query) + row = sql_results.fetchone() + + while row: + callback( + parser_context, row, query=query, cache=cache, database=database, + file_entry=file_entry, parser_chain=parser_chain) + + row = sql_results.fetchone() + + except sqlite3.DatabaseError as exception: + logging.debug(u'SQLite error occured: {0:s}'.format(exception)) + + def Process( + self, parser_context, file_entry=None, parser_chain=None, cache=None, + database=None, **kwargs): + """Determine if this is the right plugin for this database. + + This function takes a SQLiteDatabase object and compares the list + of required tables against the available tables in the database. + If all the tables defined in REQUIRED_TABLES are present in the + database then this plugin is considered to be the correct plugin + and the function will return back a generator that yields event + objects. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + cache: A SQLiteCache object. + database: A database object (instance of SQLiteDatabase). + + Raises: + errors.WrongPlugin: If the database does not contain all the tables + defined in the REQUIRED_TABLES set. + ValueError: If the database attribute is not passed in. + """ + if database is None: + raise ValueError(u'Database is not set.') + + if not frozenset(database.tables) >= self.REQUIRED_TABLES: + raise errors.WrongPlugin( + u'Not the correct database tables for: {0:s}'.format(self.NAME)) + + # This will raise if unhandled keyword arguments are passed. + super(SQLitePlugin, self).Process(parser_context, **kwargs) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + self.GetEntries( + parser_context, cache=cache, database=database, file_entry=file_entry, + parser_chain=parser_chain) diff --git a/plaso/parsers/sqlite_plugins/ls_quarantine.py b/plaso/parsers/sqlite_plugins/ls_quarantine.py new file mode 100644 index 0000000..9e4bad1 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/ls_quarantine.py @@ -0,0 +1,90 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plugin for the Mac OS X launch services quarantine events.""" + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +class LsQuarantineEvent(time_events.CocoaTimeEvent): + """Convenience class for a Mac OS X launch services quarantine event.""" + DATA_TYPE = 'macosx:lsquarantine' + + # TODO: describe more clearly what the data value contains. + def __init__(self, cocoa_time, url, user_agent, data): + """Initializes the event object. + + Args: + cocoa_time: The Cocoa time value. + url: The original URL of the file. + user_agent: The user agent that was used to download the file. + data: The data. + """ + super(LsQuarantineEvent, self).__init__( + cocoa_time, eventdata.EventTimestamp.FILE_DOWNLOADED) + + self.url = url + self.agent = user_agent + self.data = data + + +class LsQuarantinePlugin(interface.SQLitePlugin): + """Parses the launch services quarantine events database. + + The LS quarantine events are stored in SQLite database files named + /Users//Library/Preferences/\ + QuarantineEvents.com.apple.LaunchServices + """ + + NAME = 'ls_quarantine' + DESCRIPTION = u'Parser for LS quarantine events SQLite database files.' + + # Define the needed queries. + QUERIES = [ + (('SELECT LSQuarantineTimestamp AS Time, LSQuarantine' + 'AgentName AS Agent, LSQuarantineOriginURLString AS URL, ' + 'LSQuarantineDataURLString AS Data FROM LSQuarantineEvent ' + 'ORDER BY Time'), 'ParseLSQuarantineRow')] + + # The required tables. + REQUIRED_TABLES = frozenset(['LSQuarantineEvent']) + + def ParseLSQuarantineRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a launch services quarantine event row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + event_object = LsQuarantineEvent( + row['Time'], row['URL'], row['Agent'], row['Data']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugin(LsQuarantinePlugin) diff --git a/plaso/parsers/sqlite_plugins/ls_quarantine_test.py b/plaso/parsers/sqlite_plugins/ls_quarantine_test.py new file mode 100644 index 0000000..6c56807 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/ls_quarantine_test.py @@ -0,0 +1,90 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the LS Quarantine database plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import ls_quarantine as ls_quarantine_formatter +from plaso.lib import timelib_test +from plaso.parsers.sqlite_plugins import ls_quarantine +from plaso.parsers.sqlite_plugins import test_lib + + +class LSQuarantinePluginTest(test_lib.SQLitePluginTestCase): + """Tests for the LS Quarantine database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = ls_quarantine.LsQuarantinePlugin() + + def testProcess(self): + """Tests the Process function on a LS Quarantine database file.""" + test_file = self._GetTestFilePath(['quarantine.db']) + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The quarantine database contains 14 event_objects. + self.assertEquals(len(event_objects), 14) + + # Examine a VLC event. + event_object = event_objects[3] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-08 21:12:03') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(event_object.agent, u'Google Chrome') + vlc_url = ( + u'http://download.cnet.com/VLC-Media-Player/3001-2139_4-10210434.html' + u'?spi=40ab24d3c71594a5017d74be3b0c946c') + self.assertEquals(event_object.url, vlc_url) + + self.assertTrue(u'vlc-2.0.7-intel64.dmg' in event_object.data) + + # Examine a MacKeeper event. + event_object = event_objects[9] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-12 19:28:58') + self.assertEquals(event_object.timestamp, expected_timestamp) + + # Examine a SpeedTest event. + event_object = event_objects[10] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-12 19:30:16') + self.assertEquals(event_object.timestamp, expected_timestamp) + + speedtest_message = ( + u'[Google Chrome] Downloaded: http://mackeeperapp.zeobit.com/aff/' + u'speedtest.net.6/download.php?affid=460245286&trt=5&utm_campaign=' + u'3ES&tid_ext=P107fSKcSfqpMbcP3sI4fhKmeMchEB3dkAGpX4YIsvM;US;L;1 ' + u'') + speedtest_short = ( + u'http://mackeeperapp.zeobit.com/aff/speedtest.net.6/download.php?' + u'affid=4602452...') + + self._TestGetMessageStrings( + event_object, speedtest_message, speedtest_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/mac_document_versions.py b/plaso/parsers/sqlite_plugins/mac_document_versions.py new file mode 100644 index 0000000..48336e5 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/mac_document_versions.py @@ -0,0 +1,114 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for the Mac OS X Document Versions files.""" + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class MacDocumentVersionsEvent(time_events.PosixTimeEvent): + """Convenience class for a entry from the Document Versions database.""" + + DATA_TYPE = 'mac:document_versions:file' + + def __init__(self, posix_time, name, path, version_path, last_time, user_sid): + """Initializes the event object. + + Args: + posix_time: The POSIX time value. + name: name of the original file. + path: path from the original file. + version_path: path to the version copy of the original file. + last_time: the system user ID of the user that opened the file. + user_sid: identification user ID that open the file. + """ + super(MacDocumentVersionsEvent, self).__init__( + posix_time, eventdata.EventTimestamp.CREATION_TIME) + + self.name = name + self.path = path + self.version_path = version_path + # TODO: shouldn't this be a separate event? + self.last_time = last_time + self.user_sid = unicode(user_sid) + + +class MacDocumentVersionsPlugin(interface.SQLitePlugin): + """Parse the Mac OS X Document Versions SQLite database..""" + + NAME = 'mac_document_versions' + DESCRIPTION = u'Parser for document revisions SQLite database files.' + + # Define the needed queries. + # name: name from the original file. + # path: path from the original file (include the file) + # last_time: last time when the file was replicated. + # version_path: path where the version is stored. + # version_time: the timestamp when the version was created. + QUERIES = [ + (('SELECT f.file_name AS name, f.file_path AS path, ' + 'f.file_last_seen AS last_time, g.generation_path AS version_path, ' + 'g.generation_add_time AS version_time FROM files f, generations g ' + 'WHERE f.file_storage_id = g.generation_storage_id;'), + 'DocumentVersionsRow')] + + # The required tables for the query. + REQUIRED_TABLES = frozenset(['files', 'generations']) + + # The SQL field path is the relative path from DocumentRevisions. + # For this reason the Path to the program has to be added at the beginning. + ROOT_VERSION_PATH = u'/.DocumentRevisions-V100/' + + def DocumentVersionsRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a document versions row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + # version_path = "PerUser/UserID/xx/client_id/version_file" + # where PerUser and UserID are a real directories. + paths = row['version_path'].split(u'/') + if len(paths) < 2 or not paths[1].isdigit(): + user_sid = None + else: + user_sid = paths[1] + version_path = self.ROOT_VERSION_PATH + row['version_path'] + path, _, _ = row['path'].rpartition(u'/') + + event_object = MacDocumentVersionsEvent( + row['version_time'], row['name'], path, version_path, + row['last_time'], user_sid) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugin(MacDocumentVersionsPlugin) diff --git a/plaso/parsers/sqlite_plugins/mac_document_versions_test.py b/plaso/parsers/sqlite_plugins/mac_document_versions_test.py new file mode 100644 index 0000000..0d2ed97 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/mac_document_versions_test.py @@ -0,0 +1,74 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Mac OS X Document Versions plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import mac_document_versions as mac_doc_rev_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers.sqlite_plugins import mac_document_versions +from plaso.parsers.sqlite_plugins import test_lib + + +class MacDocumentVersionsTest(test_lib.SQLitePluginTestCase): + """Tests for the Mac OS X Document Versions plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = mac_document_versions.MacDocumentVersionsPlugin() + + def testProcess(self): + """Tests the Process function on a Mac OS X Document Versions file.""" + test_file = self._GetTestFilePath(['document_versions.sql']) + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 4) + + # Check the first page visited entry. + event_object = event_objects[0] + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2014-01-21 02:03:00') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(event_object.name, u'Spain is beautiful.rtf') + self.assertEquals(event_object.path, u'/Users/moxilo/Documents') + self.assertEquals(event_object.user_sid, u'501') + expected_version_path = ( + u'/.DocumentRevisions-V100/PerUID/501/1/' + u'com.apple.documentVersions/' + u'08CFEB5A-5CDA-486F-AED5-EA35BF3EE4C2.rtf') + self.assertEquals(event_object.version_path, expected_version_path) + + expected_msg = ( + u'Version of [{0:s}] ({1:s}) stored in {2:s} by {3:s}'.format( + event_object.name, event_object.path, + event_object.version_path, event_object.user_sid)) + expected_short = u'Stored a document version of [{0:s}]'.format( + event_object.name) + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/mackeeper_cache.py b/plaso/parsers/sqlite_plugins/mackeeper_cache.py new file mode 100644 index 0000000..ec06290 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/mackeeper_cache.py @@ -0,0 +1,229 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a parser for the Mac OS X MacKeeper cache database.""" + +import json + +from plaso.lib import event +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +def DictToList(data_dict): + """Take a dict object and return a list of strings back.""" + ret_list = [] + for key, value in data_dict.iteritems(): + if key in ('body', 'datetime', 'type', 'room', 'rooms', 'id'): + continue + ret_list.append(u'{0:s} = {1!s}'.format(key, value)) + + return ret_list + + +def ExtractJQuery(jquery_raw): + """Extract and return the data inside a JQuery as a dict object.""" + data_part = u'' + if not jquery_raw: + return {} + + if '[' in jquery_raw: + _, _, first_part = jquery_raw.partition('[') + data_part, _, _ = first_part.partition(']') + elif jquery_raw.startswith('//'): + _, _, first_part = jquery_raw.partition('{') + data_part = u'{{{0:s}'.format(first_part) + elif '({' in jquery_raw: + _, _, first_part = jquery_raw.partition('(') + data_part, _, _ = first_part.rpartition(')') + + if not data_part: + return {} + + try: + data_dict = json.loads(data_part) + except ValueError: + return {} + + return data_dict + + +def ParseChatData(data): + """Parse a chat comment data dict and return a parsed one back. + + Args: + data: A dict object that is parsed from the record. + + Returns: + A dict object to store the results in. + """ + data_store = {} + + if 'body' in data: + body = data.get('body', '').replace('\n', ' ') + if body.startswith('//') and '{' in body: + body_dict = ExtractJQuery(body) + title, _, _ = body.partition('{') + body = u'{0:s} <{1!s}>'.format(title[2:], DictToList(body_dict)) + else: + body = 'No text.' + + data_store['text'] = body + + room = data.get('rooms', None) + if not room: + room = data.get('room', None) + if room: + data_store['room'] = room + + data_store['id'] = data.get('id', None) + user = data.get('user', None) + if user: + try: + user_sid = int(user) + data_store['sid'] = user_sid + except (ValueError, TypeError): + data_store['user'] = user + + return data_store + + +class MacKeeperCacheEvent(event.EventObject): + """Convenience class for a MacKeeper Cache event.""" + DATA_TYPE = 'mackeeper:cache' + + def __init__(self, timestamp, description, identifier, url, data_dict): + """Initializes the event object. + + Args: + timestamp: A timestamp as a number of milliseconds since Epoch + or as a UTC string. + description: The description of the cache entry. + identifier: The row identifier. + url: The MacKeeper URL value that is stored in every event. + data_dict: A dict object with the descriptive information. + """ + super(MacKeeperCacheEvent, self).__init__() + + # Two different types of timestamps stored in log files. + if type(timestamp) in (int, long): + self.timestamp = timelib.Timestamp.FromJavaTime(timestamp) + else: + self.timestamp = timelib.Timestamp.FromTimeString(timestamp) + + self.timestamp_desc = eventdata.EventTimestamp.ADDED_TIME + self.description = description + self.offset = identifier + self.text = data_dict.get('text', None) + self.user_sid = data_dict.get('sid', None) + self.user_name = data_dict.get('user', None) + self.event_type = data_dict.get('event_type', None) + self.room = data_dict.get('room', None) + self.record_id = data_dict.get('id', None) + self.url = url + + +class MacKeeperCachePlugin(interface.SQLitePlugin): + """Plugin for the MacKeeper Cache database file.""" + + NAME = 'mackeeper_cache' + DESCRIPTION = u'Parser for MacKeeper Cache SQLite database files.' + + # Define the needed queries. + QUERIES = [(( + 'SELECT d.entry_ID AS id, d.receiver_data AS data, r.request_key, ' + 'r.time_stamp AS time_string FROM cfurl_cache_receiver_data d, ' + 'cfurl_cache_response r WHERE r.entry_ID = ' + 'd.entry_ID'), 'ParseReceiverData')] + + # The required tables. + REQUIRED_TABLES = frozenset([ + 'cfurl_cache_blob_data', 'cfurl_cache_receiver_data', + 'cfurl_cache_response']) + + def ParseReceiverData( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a single row from the receiver and cache response table. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + data = {} + key_url = row['request_key'] + + data_dict = {} + description = 'MacKeeper Entry' + # Check the URL, since that contains vital information about the type of + # event we are dealing with. + if key_url.endswith('plist'): + description = 'Configuration Definition' + data['text'] = 'Plist content added to cache.' + elif key_url.startswith('http://event.zeobit.com'): + description = 'MacKeeper Event' + try: + _, _, part = key_url.partition('?') + data['text'] = part.replace('&', ' ') + except UnicodeDecodeError: + data['text'] = 'N/A' + elif key_url.startswith('http://account.zeobit.com'): + description = 'Account Activity' + _, _, activity = key_url.partition('#') + if activity: + data['text'] = u'Action started: {0:s}'.format(activity) + else: + data['text'] = u'Unknown activity.' + elif key_url.startswith('http://support.') and 'chat' in key_url: + description = 'Chat ' + try: + jquery = unicode(row['data']) + except UnicodeDecodeError: + jquery = '' + + data_dict = ExtractJQuery(jquery) + data = ParseChatData(data_dict) + + data['entry_type'] = data_dict.get('type', '') + if data['entry_type'] == 'comment': + description += 'Comment' + elif data['entry_type'] == 'outgoing': + description += 'Outgoing Message' + elif data['entry_type'] == 'incoming': + description += 'Incoming Message' + else: + # Empty or not known entry type, generic status message. + description += 'Entry' + data['text'] = u';'.join(DictToList(data_dict)) + if not data['text']: + data['text'] = 'No additional data.' + + event_object = MacKeeperCacheEvent( + row['time_string'], description, row['id'], key_url, data) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugin(MacKeeperCachePlugin) diff --git a/plaso/parsers/sqlite_plugins/mackeeper_cache_test.py b/plaso/parsers/sqlite_plugins/mackeeper_cache_test.py new file mode 100644 index 0000000..a139e43 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/mackeeper_cache_test.py @@ -0,0 +1,68 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the MacKeeper Cache database plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import mackeeper_cache as mackeeper_cache_formatter +from plaso.lib import timelib_test +from plaso.parsers.sqlite_plugins import mackeeper_cache +from plaso.parsers.sqlite_plugins import test_lib + + +class MacKeeperCachePluginTest(test_lib.SQLitePluginTestCase): + """Tests for the MacKeeper Cache database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = mackeeper_cache.MacKeeperCachePlugin() + + def testProcess(self): + """Tests the Process function on a MacKeeper Cache database file.""" + test_file = self._GetTestFilePath(['mackeeper_cache.db']) + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The cache file contains 198 entries. + self.assertEquals(len(event_objects), 198) + + event_object = event_objects[41] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-12 19:30:31') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'Chat Outgoing Message : I have received your system scan report and ' + u'I will start analyzing it right now. [ URL: http://support.kromtech.' + u'net/chat/listen/12828340738351e0593f987450z40787/?client-id=51e0593f' + u'a1a24468673655&callback=jQuery183013571173651143909_1373657420912&_=' + u'1373657423647 Event ID: 16059074 Room: ' + u'12828340738351e0593f987450z40787 ]') + + expected_short = ( + u'I have received your system scan report and I will start analyzing ' + u'it right now.') + + self._TestGetMessageStrings(event_object, expected_msg, expected_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/skype.py b/plaso/parsers/sqlite_plugins/skype.py new file mode 100644 index 0000000..de7701c --- /dev/null +++ b/plaso/parsers/sqlite_plugins/skype.py @@ -0,0 +1,492 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a basic Skype SQLite parser.""" + +import logging + +from plaso.events import time_events +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +__author__ = 'Joaquin Moreno Garijo (bastionado@gmail.com)' + + +class SkypeChatEvent(time_events.PosixTimeEvent): + """Convenience class for a Skype event.""" + + DATA_TYPE = 'skype:event:chat' + + def __init__(self, row, to_account): + """Build a Skype Event from a single row. + + Args: + row: A row object (instance of sqlite3.Row) that contains the + extracted data from a single row in the database. + to_account: A string containing the accounts (excluding the + author) of the conversation. + """ + super(SkypeChatEvent, self).__init__( + row['timestamp'], 'Chat from Skype', self.DATA_TYPE) + + self.title = row['title'] + self.text = row['body_xml'] + self.from_account = u'{0:s} <{1:s}>'.format( + row['from_displayname'], row['author']) + self.to_account = to_account + + +class SkypeAccountEvent(time_events.PosixTimeEvent): + """Convenience class for account information.""" + + DATA_TYPE = 'skype:event:account' + + def __init__( + self, timestamp, usage, identifier, full_name, display_name, email, + country): + """Initialize the event. + + Args: + timestamp: The POSIX timestamp value. + usage: A string containing the description string of the timestamp. + identifier: The row identifier. + full_name: A string containing the full name of the Skype account holder. + display_name: A string containing the chosen display name of the account + holder. + email: A string containing the registered email address of the account + holder. + country: A string containing the chosen home country of the account + holder. + """ + super(SkypeAccountEvent, self).__init__(timestamp, usage) + + self.offset = identifier + self.username = u'{0:s} <{1:s}>'.format(full_name, display_name) + self.display_name = display_name + self.email = email + self.country = country + self.data_type = self.DATA_TYPE + + +class SkypeSMSEvent(time_events.PosixTimeEvent): + """Convenience EventObject for SMS.""" + + DATA_TYPE = 'skype:event:sms' + + def __init__(self, row, dst_number): + """Read the information related with the SMS. + + Args: + row: row form the sql query. + row['time_sms']: timestamp when the sms was send. + row['dstnum_sms']: number which receives the sms. + row['msg_sms']: text send to this sms. + dst_number: phone number where the user send the sms. + """ + super(SkypeSMSEvent, self).__init__( + row['time_sms'], 'SMS from Skype', self.DATA_TYPE) + + self.number = dst_number + self.text = row['msg_sms'] + + +class SkypeCallEvent(time_events.PosixTimeEvent): + """Convenience EventObject for the calls.""" + + DATA_TYPE = 'skype:event:call' + + def __init__(self, timestamp, call_type, user_start_call, + source, destination, video_conference): + """Contains information if the call was cancelled, accepted or finished. + + Args: + timestamp: the timestamp of the event. + call_type: WAITING, STARTED, FINISHED. + user_start_call: boolean, true indicates that the owner + account started the call. + source: the account which started the call. + destination: the account which gets the call. + video_conference: boolean, if is true it was a videoconference. + """ + + super(SkypeCallEvent, self).__init__( + timestamp, 'Call from Skype', self.DATA_TYPE) + + self.call_type = call_type + self.user_start_call = user_start_call + self.src_call = source + self.dst_call = destination + self.video_conference = video_conference + + +class SkypeTransferFileEvent(time_events.PosixTimeEvent): + """Evaluate the action of send a file.""" + + DATA_TYPE = 'skype:event:transferfile' + + def __init__(self, row, timestamp, action_type, source, destination): + """Actions related with sending files. + + Args: + row: + filepath: path from the file. + filename: name of the file. + filesize: size of the file. + timestamp: when the action happens. + action_type: GETSOLICITUDE, SENDSOLICITUDE, ACCEPTED, FINISHED. + source: The account that sent the file. + destination: The account that received the file. + """ + + super(SkypeTransferFileEvent, self).__init__( + timestamp, 'File transfer from Skype', self.DATA_TYPE) + + self.offset = row['id'] + self.action_type = action_type + self.source = source + self.destination = destination + self.transferred_filepath = row['filepath'] + self.transferred_filename = row['filename'] + try: + self.transferred_filesize = int(row['filesize']) + except ValueError: + logging.debug(u'Unknown filesize {0:s}'.format( + self.transferred_filename)) + self.transferred_filesize = 0 + + +class SkypePlugin(interface.SQLitePlugin): + """SQLite plugin for Skype main.db SQlite database file.""" + + NAME = 'skype' + DESCRIPTION = u'Parser for Skype SQLite database files.' + + # Queries for building cache. + QUERY_DEST_FROM_TRANSFER = ( + u'SELECT parent_id, partner_handle AS skypeid, ' + u'partner_dispname AS skypename FROM transfers') + QUERY_SOURCE_FROM_TRANSFER = ( + u'SELECT pk_id, partner_handle AS skypeid, ' + u'partner_dispname AS skypename FROM transfers') + + # Define the needed queries. + QUERIES = [ + (('SELECT c.id, c.participants, c.friendlyname AS title, ' + 'm.author AS author, m.from_dispname AS from_displayname, ' + 'm.body_xml, m.timestamp, c.dialog_partner FROM Chats c, Messages m ' + 'WHERE c.name = m.chatname'), 'ParseChat'), + (('SELECT id, fullname, given_displayname, emails, ' + 'country, profile_timestamp, authreq_timestamp, ' + 'lastonline_timestamp, mood_timestamp, sent_authrequest_time, ' + 'lastused_timestamp FROM Accounts'), 'ParseAccountInformation'), + (('SELECT id, target_numbers AS dstnum_sms, timestamp AS time_sms, ' + 'body AS msg_sms FROM SMSes'), 'ParseSMS'), + (('SELECT id, partner_handle, partner_dispname, offer_send_list, ' + 'starttime, accepttime, finishtime, filepath, filename, filesize, ' + 'status, parent_id, pk_id FROM Transfers'), 'ParseFileTransfer'), + (('SELECT c.id, cm.guid, c.is_incoming, ' + 'cm.call_db_id, cm.videostatus, c.begin_timestamp AS try_call, ' + 'cm.start_timestamp AS accept_call, cm.call_duration ' + 'FROM Calls c, CallMembers cm ' + 'WHERE c.id = cm.call_db_id;'), 'ParseCall')] + + # The required tables. + REQUIRED_TABLES = frozenset([ + 'Chats', 'Accounts', 'Conversations', 'Contacts', 'SMSes', 'Transfers', + 'CallMembers', 'Calls']) + + def ParseAccountInformation( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses the Accounts database. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + if row['profile_timestamp']: + event_object = SkypeAccountEvent( + row['profile_timestamp'], u'Profile Changed', row['id'], + row['fullname'], row['given_displayname'], row['emails'], + row['country']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['authreq_timestamp']: + event_object = SkypeAccountEvent( + row['authreq_timestamp'], u'Authenticate Request', row['id'], + row['fullname'], row['given_displayname'], row['emails'], + row['country']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['lastonline_timestamp']: + event_object = SkypeAccountEvent( + row['lastonline_timestamp'], u'Last Online', row['id'], + row['fullname'], row['given_displayname'], row['emails'], + row['country']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['mood_timestamp']: + event_object = SkypeAccountEvent( + row['mood_timestamp'], u'Mood Event', row['id'], + row['fullname'], row['given_displayname'], row['emails'], + row['country']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['sent_authrequest_time']: + event_object = SkypeAccountEvent( + row['sent_authrequest_time'], u'Auth Request Sent', row['id'], + row['fullname'], row['given_displayname'], row['emails'], + row['country']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['lastused_timestamp']: + event_object = SkypeAccountEvent( + row['lastused_timestamp'], u'Last Used', row['id'], + row['fullname'], row['given_displayname'], row['emails'], + row['country']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + def ParseChat( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses a chat message row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + to_account = '' + accounts = [] + participants = row['participants'].split(' ') + for participant in participants: + if participant != row['author']: + accounts.append(participant) + to_account = u', '.join(accounts) + + if not to_account: + if row['dialog_partner']: + to_account = row['dialog_partner'] + else: + to_account = u'Unknown User' + + event_object = SkypeChatEvent(row, to_account) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + def ParseSMS( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parse SMS. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + dst_number = row['dstnum_sms'].replace(' ', '') + + event_object = SkypeSMSEvent(row, dst_number) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + def ParseCall( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parse the calls taking into accounts some rows. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + try: + aux = row['guid'] + if aux: + aux_list = aux.split('-') + src_aux = aux_list[0] + dst_aux = aux_list[1] + else: + src_aux = u'Unknown [no GUID]' + dst_aux = u'Unknown [no GUID]' + except IndexError: + src_aux = u'Unknown [{0:s}]'.format(row['guid']) + dst_aux = u'Unknown [{0:s}]'.format(row['guid']) + + if row['is_incoming'] == '0': + user_start_call = True + source = src_aux + if row['ip_address']: + destination = u'{0:s} <{1:s}>'.format(dst_aux, row['ip_address']) + else: + destination = dst_aux + else: + user_start_call = False + source = src_aux + destination = dst_aux + + if row['videostatus'] == '3': + video_conference = True + else: + video_conference = False + + event_object = SkypeCallEvent( + row['try_call'], 'WAITING', user_start_call, source, destination, + video_conference) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['accept_call']: + event_object = SkypeCallEvent( + row['accept_call'], 'ACCEPTED', user_start_call, source, destination, + video_conference) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['call_duration']: + try: + timestamp = int(row['accept_call']) + int(row['call_duration']) + event_object = SkypeCallEvent( + timestamp, 'FINISHED', user_start_call, source, destination, + video_conference) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + except ValueError: + logging.debug(( + u'[{0:s}] Unable to determine when the call {1:s} was ' + u'finished.').format(self.NAME, row['id'])) + + def ParseFileTransfer( + self, parser_context, row, file_entry=None, parser_chain=None, cache=None, + database=None, query=None, **unused_kwargs): + """Parse the transfer files. + + There is no direct relationship between who sends the file and + who accepts the file. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: the row with all information related with the file transfers. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + cache: a cache object (instance of SQLiteCache). + database: A database object (instance of SQLiteDatabase). + """ + source_dict = cache.GetResults('source') + if not source_dict: + cursor = database.cursor + results = cursor.execute(self.QUERY_SOURCE_FROM_TRANSFER) + cache.CacheQueryResults( + results, 'source', 'pk_id', ('skypeid', 'skypename')) + source_dict = cache.GetResults('source') + + dest_dict = cache.GetResults('destination') + if not dest_dict: + cursor = database.cursor + results = cursor.execute(self.QUERY_DEST_FROM_TRANSFER) + cache.CacheQueryResults( + results, 'destination', 'parent_id', ('skypeid', 'skypename')) + dest_dict = cache.GetResults('destination') + + source = u'Unknown' + destination = u'Unknown' + + if row['parent_id']: + destination = u'{0:s} <{1:s}>'.format( + row['partner_handle'], row['partner_dispname']) + skype_id, skype_name = source_dict.get(row['parent_id'], [None, None]) + if skype_name: + source = u'{0:s} <{1:s}>'.format(skype_id, skype_name) + else: + source = u'{0:s} <{1:s}>'.format( + row['partner_handle'], row['partner_dispname']) + + if row['pk_id']: + skype_id, skype_name = dest_dict.get(row['pk_id'], [None, None]) + if skype_name: + destination = u'{0:s} <{1:s}>'.format(skype_id, skype_name) + + if row['status'] == 8: + if row['starttime']: + event_object = SkypeTransferFileEvent( + row, row['starttime'], 'GETSOLICITUDE', source, destination) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['accepttime']: + event_object = SkypeTransferFileEvent( + row, row['accepttime'], 'ACCEPTED', source, destination) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + if row['finishtime']: + event_object = SkypeTransferFileEvent( + row, row['finishtime'], 'FINISHED', source, destination) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + elif row['status'] == 2 and row['starttime']: + event_object = SkypeTransferFileEvent( + row, row['starttime'], 'SENDSOLICITUDE', source, destination) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugin(SkypePlugin) diff --git a/plaso/parsers/sqlite_plugins/skype_test.py b/plaso/parsers/sqlite_plugins/skype_test.py new file mode 100644 index 0000000..e183748 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/skype_test.py @@ -0,0 +1,158 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Skype main.db history database plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import skype as skype_formatter +from plaso.lib import timelib_test +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import skype +from plaso.parsers.sqlite_plugins import test_lib + + +class SkypePluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Skype main.db history database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = skype.SkypePlugin() + + def testProcess(self): + """Tests the Process function on a Skype History database file. + + The History file contains 24 events: + 4 call events + 4 transfers file events + 1 sms events + 15 chat events + + Events used: + id = 16 -> SMS + id = 22 -> Call + id = 18 -> File + id = 1 -> Chat + id = 14 -> ChatRoom + """ + test_file = self._GetTestFilePath(['skype_main.db']) + cache = sqlite.SQLiteCache() + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file, cache) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + calls = 0 + files = 0 + sms = 0 + chats = 0 + for event_object in event_objects: + if event_object.data_type == 'skype:event:call': + calls += 1 + if event_object.data_type == 'skype:event:transferfile': + files += 1 + if event_object.data_type == 'skype:event:sms': + sms += 1 + if event_object.data_type == 'skype:event:chat': + chats += 1 + + self.assertEquals(len(event_objects), 24) + self.assertEquals(files, 4) + self.assertEquals(sms, 1) + self.assertEquals(chats, 15) + self.assertEquals(calls, 3) + + # TODO: Split this up into separate functions for testing each type of + # event, eg: testSMS, etc. + sms_event_object = event_objects[16] + call_event_object = event_objects[22] + event_file = event_objects[18] + chat_event_object = event_objects[1] + chat_room_event_object = event_objects[14] + + # Test cache processing and format strings. + expected_msg = ( + u'Source: gen.beringer Destination: ' + u'european.bbq.competitor File: secret-project.pdf ' + u'[SENDSOLICITUDE]') + + self._TestGetMessageStrings( + event_objects[17], expected_msg, expected_msg[0:77] + '...') + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-01 22:14:22') + self.assertEquals(sms_event_object.timestamp, expected_timestamp) + text_sms = (u'If you want I can copy ' + u'some documents for you, ' + u'if you can pay it... ;)') + self.assertEquals(sms_event_object.text, text_sms) + number = u'+34123456789' + self.assertEquals(sms_event_object.number, number) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-10-24 21:49:35') + self.assertEquals(event_file.timestamp, expected_timestamp) + + action_type = u'GETSOLICITUDE' + self.assertEquals(event_file.action_type, action_type) + source = u'gen.beringer ' + self.assertEquals(event_file.source, source) + destination = u'european.bbq.competitor ' + self.assertEquals(event_file.destination, destination) + transferred_filename = u'secret-project.pdf' + self.assertEquals(event_file.transferred_filename, transferred_filename) + filepath = u'/Users/gberinger/Desktop/secret-project.pdf' + self.assertEquals(event_file.transferred_filepath, filepath) + self.assertEquals(event_file.transferred_filesize, 69986) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-30 21:27:11') + self.assertEquals(chat_event_object.timestamp, expected_timestamp) + + title = u'European Competitor | need to know if you got it..' + self.assertEquals(chat_event_object.title, title) + expected_msg = u'need to know if you got it this time.' + self.assertEquals(chat_event_object.text, expected_msg) + from_account = u'Gen Beringer ' + self.assertEquals(chat_event_object.from_account, from_account) + self.assertEquals(chat_event_object.to_account, u'european.bbq.competitor') + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-10-27 15:29:19') + self.assertEquals(chat_room_event_object.timestamp, expected_timestamp) + + title = u'European Competitor, Echo123' + self.assertEquals(chat_room_event_object.title, title) + expected_msg = u'He is our new employee' + self.assertEquals(chat_room_event_object.text, expected_msg) + from_account = u'European Competitor ' + self.assertEquals(chat_room_event_object.from_account, from_account) + to_account = u'gen.beringer, echo123' + self.assertEquals(chat_room_event_object.to_account, to_account) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-01 22:12:17') + self.assertEquals(call_event_object.timestamp, expected_timestamp) + + self.assertEquals(call_event_object.dst_call, u'european.bbq.competitor') + self.assertEquals(call_event_object.src_call, u'gen.beringer') + self.assertEquals(call_event_object.user_start_call, False) + self.assertEquals(call_event_object.video_conference, False) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_plugins/test_lib.py b/plaso/parsers/sqlite_plugins/test_lib.py new file mode 100644 index 0000000..2720942 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/test_lib.py @@ -0,0 +1,63 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SQLite database plugin related functions and classes for testing.""" + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.engine import single_process +from plaso.parsers import sqlite +from plaso.parsers import test_lib + + +class SQLitePluginTestCase(test_lib.ParserTestCase): + """The unit test case for SQLite database plugins.""" + + def _ParseDatabaseFileWithPlugin( + self, plugin_object, path, cache=None, knowledge_base_values=None): + """Parses a file as a SQLite database with a specific plugin. + + Args: + plugin_object: The plugin object that is used to extract an event + generator. + path: The path to the SQLite database file. + cache: A cache object (instance of SQLiteCache). + knowledge_base_values: optional dict containing the knowledge base + values. The default is None. + + Returns: + An event object queue consumer object (instance of + TestEventObjectQueueConsumer). + """ + event_queue = single_process.SingleProcessQueue() + event_queue_consumer = test_lib.TestEventObjectQueueConsumer(event_queue) + + parse_error_queue = single_process.SingleProcessQueue() + + parser_context = self._GetParserContext( + event_queue, parse_error_queue, + knowledge_base_values=knowledge_base_values) + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=path) + file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) + + with sqlite.SQLiteDatabase(file_entry) as database: + plugin_object.Process(parser_context, cache=cache, database=database) + + return event_queue_consumer diff --git a/plaso/parsers/sqlite_plugins/zeitgeist.py b/plaso/parsers/sqlite_plugins/zeitgeist.py new file mode 100644 index 0000000..2ec998d --- /dev/null +++ b/plaso/parsers/sqlite_plugins/zeitgeist.py @@ -0,0 +1,84 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plugin for the Zeitgeist SQLite database. + + Zeitgeist is a service which logs the user activities and events, anywhere + from files opened to websites visited and conversations. +""" + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.parsers import sqlite +from plaso.parsers.sqlite_plugins import interface + + +class ZeitgeistEvent(time_events.JavaTimeEvent): + """Convenience class for a Zeitgeist event.""" + + DATA_TYPE = 'zeitgeist:activity' + + def __init__(self, java_time, row_id, subject_uri): + """Initializes the event object. + + Args: + java_time: The Java time value. + row_id: The identifier of the corresponding row. + subject_uri: The Zeitgeist event. + """ + super(ZeitgeistEvent, self).__init__( + java_time, eventdata.EventTimestamp.UNKNOWN) + + self.offset = row_id + self.subject_uri = subject_uri + + +class ZeitgeistPlugin(interface.SQLitePlugin): + """SQLite plugin for Zeitgeist activity database.""" + + NAME = 'zeitgeist' + DESCRIPTION = u'Parser for Zeitgeist activity SQLite database files.' + + # TODO: Explore the database more and make this parser cover new findings. + + QUERIES = [ + ('SELECT id, timestamp, subj_uri FROM event_view', + 'ParseZeitgeistEventRow')] + + REQUIRED_TABLES = frozenset(['event', 'actor']) + + def ParseZeitgeistEventRow( + self, parser_context, row, file_entry=None, parser_chain=None, query=None, + **unused_kwargs): + """Parses zeitgeist event row. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: The row resulting from the query. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + query: Optional query string. The default is None. + """ + event_object = ZeitgeistEvent(row['timestamp'], row['id'], row['subj_uri']) + parser_context.ProduceEvent( + event_object, query=query, parser_chain=parser_chain, + file_entry=file_entry) + + +sqlite.SQLiteParser.RegisterPlugin(ZeitgeistPlugin) diff --git a/plaso/parsers/sqlite_plugins/zeitgeist_test.py b/plaso/parsers/sqlite_plugins/zeitgeist_test.py new file mode 100644 index 0000000..50fc454 --- /dev/null +++ b/plaso/parsers/sqlite_plugins/zeitgeist_test.py @@ -0,0 +1,61 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Zeitgeist activity database plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import zeitgeist as zeitgeist_formatter +from plaso.lib import timelib_test +from plaso.parsers.sqlite_plugins import test_lib +from plaso.parsers.sqlite_plugins import zeitgeist + + +class ZeitgeistPluginTest(test_lib.SQLitePluginTestCase): + """Tests for the Zeitgeist activity database plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = zeitgeist.ZeitgeistPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['activity.sqlite']) + event_queue_consumer = self._ParseDatabaseFileWithPlugin( + self._plugin, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The sqlite database contains 44 events. + self.assertEquals(len(event_objects), 44) + + # Check the first event. + event_object = event_objects[0] + + expected_subject_uri = u'application://rhythmbox.desktop' + self.assertEquals(event_object.subject_uri, expected_subject_uri) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-10-22 08:53:19.477') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = u'application://rhythmbox.desktop' + self._TestGetMessageStrings(event_object, expected_msg, expected_msg) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/sqlite_test.py b/plaso/parsers/sqlite_test.py new file mode 100644 index 0000000..3150618 --- /dev/null +++ b/plaso/parsers/sqlite_test.py @@ -0,0 +1,63 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the SQLite database parser.""" + +import unittest + +from plaso.parsers import sqlite +# Register plugins. +from plaso.parsers import sqlite_plugins # pylint: disable=unused-import + + +class SQLiteParserTest(unittest.TestCase): + """Tests for the SQLite database parser.""" + + def testGetPluginNames(self): + """Tests the GetPluginNames function.""" + all_plugin_names = sqlite.SQLiteParser.GetPluginNames() + + self.assertNotEquals(all_plugin_names, []) + + self.assertTrue('skype' in all_plugin_names) + self.assertTrue('chrome_history' in all_plugin_names) + self.assertTrue('firefox_history' in all_plugin_names) + + # Change the calculations of the parsers. + parser_filter_string = 'chrome_history, firefox_history, -skype' + plugin_names = sqlite.SQLiteParser.GetPluginNames( + parser_filter_string=parser_filter_string) + + self.assertEquals(len(plugin_names), 2) + self.assertFalse('skype' in plugin_names) + self.assertTrue('chrome_history' in plugin_names) + self.assertTrue('firefox_history' in plugin_names) + + # Test with a different plugin selection. + parser_filter_string = 'sqlite, -skype' + plugin_names = sqlite.SQLiteParser.GetPluginNames( + parser_filter_string=parser_filter_string) + + # This should result in all plugins EXCEPT the skype one. + self.assertEquals(len(plugin_names), len(all_plugin_names) - 1) + self.assertFalse('skype' in plugin_names) + self.assertTrue('chrome_history' in plugin_names) + self.assertTrue('firefox_history' in plugin_names) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/symantec.py b/plaso/parsers/symantec.py new file mode 100644 index 0000000..21cffa6 --- /dev/null +++ b/plaso/parsers/symantec.py @@ -0,0 +1,153 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a Symantec parser in plaso.""" + +from plaso.events import text_events +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + +import pytz + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class SymantecEvent(text_events.TextEvent): + """Convenience class for a Symantec line event.""" + DATA_TYPE = 'av:symantec:scanlog' + + +class SymantecParser(text_parser.TextCSVParser): + """Parse Symantec AV Corporate Edition and Endpoint Protection log files.""" + + NAME = 'symantec_scanlog' + DESCRIPTION = u'Parser for Symantec Anti-Virus log files.' + + # Define the columns that make up the structure of a Symantec log file. + # http://www.symantec.com/docs/TECH100099 + COLUMNS = [ + 'time', 'event', 'cat', 'logger', 'computer', 'user', + 'virus', 'file', 'action1', 'action2', 'action0', 'virustype', + 'flags', 'description', 'scanid', 'new_ext', 'groupid', + 'event_data', 'vbin_id', 'virus_id', 'quarfwd_status', + 'access', 'snd_status', 'compressed', 'depth', 'still_infected', + 'definfo', 'defseqnumber', 'cleaninfo', 'deleteinfo', + 'backup_id', 'parent', 'guid', 'clientgroup', 'address', + 'domainname', 'ntdomain', 'macaddr', 'version:', + 'remote_machine', 'remote_machine_ip', 'action1_status', + 'action2_status', 'license_feature_name', 'license_feature_ver', + 'license_serial_num', 'license_fulfillment_id', 'license_start_dt', + 'license_expiration_dt', 'license_lifecycle', 'license_seats_total', + 'license_seats', 'err_code', 'license_seats_delta', 'status', + 'domain_guid', 'log_session_guid', 'vbin_session_id', + 'login_domain', 'extra'] + + def _GetTimestamp(self, timestamp_raw, timezone=pytz.utc): + """Return a 64-bit signed timestamp value in micro seconds since Epoch. + + The timestamp consists of six hexadecimal octets. + They represent the following: + First octet: Number of years since 1970 + Second octet: Month, where January = 0 + Third octet: Day + Fourth octet: Hour + Fifth octet: Minute + Sixth octet: Second + + For example, 200A13080122 represents November 19, 2002, 8:01:34 AM. + + Args: + timestamp_raw: The hexadecimal encoded timestamp value. + timezone: Optional timezone (instance of pytz.timezone). + The default is UTC. + + Returns: + A plaso timestamp value, micro seconds since Epoch in UTC. + """ + if timestamp_raw == '': + return 0 + + year, month, day, hours, minutes, seconds = ( + int(x[0] + x[1], 16) for x in zip( + timestamp_raw[::2], timestamp_raw[1::2])) + + return timelib.Timestamp.FromTimeParts( + year + 1970, month + 1, day, hours, minutes, seconds, timezone=timezone) + + def VerifyRow(self, parser_context, row): + """Verify a single line of a Symantec log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: A single row from the CSV file. + + Returns: + True if this is the correct parser, False otherwise. + """ + try: + timestamp = self._GetTimestamp(row['time'], parser_context.timezone) + except (TypeError, ValueError): + return False + + if not timestamp: + return False + + # Check few entries. + try: + my_event = int(row['event']) + except TypeError: + return False + + if my_event < 1 or my_event > 77: + return False + + try: + category = int(row['cat']) + except TypeError: + return False + + if category < 1 or category > 4: + return False + + return True + + def ParseRow( + self, parser_context, row_offset, row, file_entry=None, + parser_chain=None): + """Parses a row and extract event objects. + + Args: + parser_context: A parser context object (instance of ParserContext). + row_offset: The offset of the row. + row: A dictionary containing all the fields as denoted in the + COLUMNS class list. + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + timestamp = self._GetTimestamp(row['time'], parser_context.timezone) + + # TODO: Create new dict object that only contains valuable attributes. + event_object = SymantecEvent(timestamp, row_offset, row) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +manager.ParsersManager.RegisterParser(SymantecParser) diff --git a/plaso/parsers/symantec_test.py b/plaso/parsers/symantec_test.py new file mode 100644 index 0000000..aef99f1 --- /dev/null +++ b/plaso/parsers/symantec_test.py @@ -0,0 +1,93 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Symantec AV Log parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import symantec as symantec_formatter +from plaso.lib import timelib_test +from plaso.parsers import symantec +from plaso.parsers import test_lib + +import pytz + + +class SymantecAccessProtectionUnitTest(test_lib.ParserTestCase): + """Tests for the Symantec AV Log parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = symantec.SymantecParser() + + def testGetTimestamp(self): + """Tests the _GetTimestamp function.""" + # pylint: disable=protected-access + timestamp = self._parser._GetTimestamp('200A13080122', timezone=pytz.UTC) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2002-11-19 08:01:34') + self.assertEquals(timestamp, expected_timestamp) + + timestamp = self._parser._GetTimestamp('2A0A1E0A2F1D', timezone=pytz.UTC) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-11-30 10:47:29') + self.assertEquals(timestamp, expected_timestamp) + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['Symantec.Log']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # The file contains 8 lines which should result in 8 event objects. + self.assertEquals(len(event_objects), 8) + + # Test the second entry: + event_object = event_objects[1] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-11-30 10:47:29') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals(event_object.user, u'davnads') + expected_file = ( + u'D:\\Twinkle_Prod$\\VM11 XXX\\outside\\test.exe.txt') + self.assertEquals(event_object.file, expected_file) + + expected_msg = ( + u'Event Name: GL_EVENT_INFECTION; ' + u'Category Name: GL_CAT_INFECTION; ' + u'Malware Name: W32.Changeup!gen33; ' + u'Malware Path: ' + u'D:\\Twinkle_Prod$\\VM11 XXX\\outside\\test.exe.txt; ' + u'Action0: Unknown; ' + u'Action1: Clean virus from file; ' + u'Action2: Delete infected file; ' + u'Scan ID: 0; ' + u'Event Data: 201\t4\t6\t1\t65542\t0\t0\t0\t0\t0\t0') + expected_msg_short = ( + u'D:\\Twinkle_Prod$\\VM11 XXX\\outside\\test.exe.txt; ' + u'W32.Changeup!gen33; ' + u'Unknown; ...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/syslog.py b/plaso/parsers/syslog.py new file mode 100644 index 0000000..cf0c9df --- /dev/null +++ b/plaso/parsers/syslog.py @@ -0,0 +1,209 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a syslog parser in plaso.""" + +import datetime +import logging + +from plaso.events import text_events +from plaso.lib import lexer +from plaso.lib import timelib +from plaso.lib import utils +from plaso.parsers import manager +from plaso.parsers import text_parser + + +class SyslogLineEvent(text_events.TextEvent): + """Convenience class for a syslog line event.""" + DATA_TYPE = 'syslog:line' + + +class SyslogParser(text_parser.SlowLexicalTextParser): + """Parse text based syslog files.""" + + NAME = 'syslog' + DESCRIPTION = u'Parser for syslog files.' + + # TODO: can we change this similar to SQLite where create an + # event specific object for different lines using a callback function. + # Define the tokens that make up the structure of a syslog file. + tokens = [ + lexer.Token('INITIAL', + '(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ', + 'SetMonth', 'DAY'), + lexer.Token('DAY', r'\s?(\d{1,2})\s+', 'SetDay', 'TIME'), + lexer.Token('TIME', r'([0-9:\.]+) ', 'SetTime', 'STRING_HOST'), + lexer.Token('STRING_HOST', r'^--(-)', 'ParseHostname', 'STRING'), + lexer.Token('STRING_HOST', r'([^\s]+) ', 'ParseHostname', 'STRING_PID'), + lexer.Token('STRING_PID', r'([^\:\n]+)', 'ParsePid', 'STRING'), + lexer.Token('STRING', r'([^\n]+)', 'ParseString', ''), + lexer.Token('STRING', r'\n\t', None, ''), + lexer.Token('STRING', r'\t', None, ''), + lexer.Token('STRING', r'\n', 'ParseMessage', 'INITIAL'), + lexer.Token('.', '([^\n]+)\n', 'ParseIncomplete', 'INITIAL'), + lexer.Token('.', '\n[^\t]', 'ParseIncomplete', 'INITIAL'), + lexer.Token('S[.]+', '(.+)', 'ParseString', ''), + ] + + def __init__(self): + """Initializes a syslog parser object.""" + super(SyslogParser, self).__init__(local_zone=True) + # Set the initial year to 0 (fixed in the actual Parse method) + self._year_use = 0 + self._last_month = 0 + + # Set some additional attributes. + self.attributes['reporter'] = '' + self.attributes['pid'] = '' + + def _GetYear(self, stat, timezone): + """Retrieves the year either from the input file or from the settings.""" + time = getattr(stat, 'crtime', 0) + if not time: + time = getattr(stat, 'ctime', 0) + + if not time: + current_year = timelib.GetCurrentYear() + logging.error(( + u'Unable to determine year of syslog file.\nDefautling to: ' + u'{0:d}').format(current_year)) + return current_year + + try: + timestamp = datetime.datetime.fromtimestamp(time, timezone) + except ValueError as exception: + current_year = timelib.GetCurrentYear() + logging.error( + u'Unable to determine year of syslog file with error: {0:s}\n' + u'Defaulting to: {1:d}'.format(exception, current_year)) + return current_year + + return timestamp.year + + def ParseLine(self, parser_context): + """Parse a single line from the syslog file. + + This method extends the one from TextParser slightly, adding + the context of the reporter and pid values found inside syslog + files. + + Args: + parser_context: A parser context object (instance of ParserContext). + + Returns: + An event object (instance of TextEvent). + """ + # Note this an older comment applying to a similar approach previously + # the init function. + # TODO: this is a HACK to get the tests working let's discuss this. + if not self._year_use: + self._year_use = parser_context.year + + if not self._year_use: + # TODO: Find a decent way to actually calculate the correct year + # from the syslog file, instead of relying on stats object. + stat = self.file_entry.GetStat() + self._year_use = self._GetYear(stat, parser_context.timezone) + + if not self._year_use: + # TODO: Make this sensible, not have the year permanent. + self._year_use = 2012 + + month_compare = int(self.attributes['imonth']) + if month_compare and self._last_month > month_compare: + self._year_use += 1 + + self._last_month = int(self.attributes['imonth']) + + self.attributes['iyear'] = self._year_use + + return super(SyslogParser, self).ParseLine(parser_context) + + def ParseHostname(self, match=None, **unused_kwargs): + """Parses the hostname. + + This is a callback function for the text parser (lexer) and is + called by the STRING_HOST lexer state. + + Args: + match: The regular expression match object. + """ + self.attributes['hostname'] = match.group(1) + + def ParsePid(self, match=None, **unused_kwargs): + """Parses the process identifier (PID). + + This is a callback function for the text parser (lexer) and is + called by the STRING_PID lexer state. + + Args: + match: The regular expression match object. + """ + # TODO: Change this logic and rather add more Tokens that + # fully cover all variations of the various PID stages. + line = match.group(1) + if line[-1] == ']': + splits = line.split('[') + if len(splits) == 2: + self.attributes['reporter'], pid = splits + else: + pid = splits[-1] + self.attributes['reporter'] = '['.join(splits[:-1]) + try: + self.attributes['pid'] = int(pid[:-1]) + except ValueError: + self.attributes['pid'] = 0 + else: + self.attributes['reporter'] = line + + def ParseString(self, match=None, **unused_kwargs): + """Parses a (body text) string. + + This is a callback function for the text parser (lexer) and is + called by the STRING lexer state. + + Args: + match: The regular expression match object. + """ + self.attributes['body'] += utils.GetUnicodeString(match.group(1)) + + def PrintLine(self): + """Prints a log line.""" + self.attributes['iyear'] = 2012 + return super(SyslogParser, self).PrintLine() + + # TODO: this is a rough initial implementation to get this working. + def CreateEvent(self, timestamp, offset, attributes): + """Creates a syslog line event. + + This overrides the default function in TextParser to create + syslog line events instead of text events. + + Args: + timestamp: The timestamp time value. The timestamp contains the + number of microseconds since Jan 1, 1970 00:00:00 UTC. + offset: The offset of the event. + attributes: A dict that contains the events attributes. + + Returns: + A text event (SyslogLineEvent). + """ + return SyslogLineEvent(timestamp, offset, attributes) + + +manager.ParsersManager.RegisterParser(SyslogParser) diff --git a/plaso/parsers/syslog_test.py b/plaso/parsers/syslog_test.py new file mode 100644 index 0000000..dd6f884 --- /dev/null +++ b/plaso/parsers/syslog_test.py @@ -0,0 +1,76 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the syslog parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import syslog as syslog_formatter +from plaso.lib import timelib_test +from plaso.parsers import syslog +from plaso.parsers import test_lib + + +class SyslogUnitTest(test_lib.ParserTestCase): + """Tests for the syslog parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = syslog.SyslogParser() + + def testParse(self): + """Tests the Parse function.""" + knowledge_base_values = {'year': 2012} + test_file = self._GetTestFilePath(['syslog']) + event_queue_consumer = self._ParseFile( + self._parser, test_file, knowledge_base_values=knowledge_base_values) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 13) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-01-22 07:52:33') + self.assertEquals(event_objects[0].timestamp, expected_timestamp) + self.assertEquals(event_objects[0].hostname, 'myhostname.myhost.com') + + expected_string = ( + u'[client, pid: 30840] : INFO No new content.') + self._TestGetMessageStrings( + event_objects[0], expected_string, expected_string) + + expected_msg = ( + '[aprocess, pid: 101001] : This is a multi-line message that screws up' + 'many syslog parsers.') + expected_msg_short = ( + '[aprocess, pid: 101001] : This is a multi-line message that screws up' + 'many sys...') + self._TestGetMessageStrings( + event_objects[11], expected_msg, expected_msg_short) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-02-29 01:15:43') + self.assertEquals(event_objects[6].timestamp, expected_timestamp) + + # Testing year increment. + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-03-23 23:01:18') + self.assertEquals(event_objects[8].timestamp, expected_timestamp) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/test_lib.py b/plaso/parsers/test_lib.py new file mode 100644 index 0000000..1fc7f9d --- /dev/null +++ b/plaso/parsers/test_lib.py @@ -0,0 +1,234 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser related functions and classes for testing.""" + +import os +import unittest + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.artifacts import knowledge_base +from plaso.engine import queue +from plaso.engine import single_process +from plaso.formatters import manager as formatters_manager +from plaso.lib import event +from plaso.parsers import context + + +class TestEventObjectQueueConsumer(queue.EventObjectQueueConsumer): + """Class that implements a list event object queue consumer.""" + + def __init__(self, event_queue): + """Initializes the list event object queue consumer. + + Args: + event_queue: the event object queue (instance of Queue). + """ + super(TestEventObjectQueueConsumer, self).__init__(event_queue) + self.event_objects = [] + + def _ConsumeEventObject(self, event_object, **unused_kwargs): + """Consumes an event object callback for ConsumeEventObjects.""" + self.event_objects.append(event_object) + + +class ParserTestCase(unittest.TestCase): + """The unit test case for a parser.""" + + _TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data') + + # Show full diff results, part of TestCase so does not follow our naming + # conventions. + maxDiff = None + + def _GetEventObjects(self, event_generator): + """Retrieves the event objects from the event generator. + + This function will extract event objects from a generator. + + Args: + event_generator: the event generator as returned by the parser. + + Returns: + A list of event objects (instances of EventObject). + """ + event_objects = [] + + for event_object in event_generator: + self.assertIsInstance(event_object, event.EventObject) + event_objects.append(event_object) + + return event_objects + + def _GetEventObjectsFromQueue(self, event_queue_consumer): + """Retrieves the event objects from the queue consumer. + + Args: + event_queue_consumer: the event object queue consumer object (instance of + TestEventObjectQueueConsumer). + + Returns: + A list of event objects (instances of EventObject). + """ + event_queue_consumer.ConsumeEventObjects() + + event_objects = [] + for event_object in event_queue_consumer.event_objects: + self.assertIsInstance(event_object, event.EventObject) + event_objects.append(event_object) + + return event_objects + + def _GetParserContext( + self, event_queue, parse_error_queue, knowledge_base_values=None): + """Retrieves a parser context object. + + Args: + event_queue: the event queue (instance of Queue). + parse_error_queue: the parse error queue (instance of Queue). + knowledge_base_values: optional dict containing the knowledge base + values. The default is None. + + Returns: + A parser context object (instance of ParserContext). + """ + event_queue_producer = queue.ItemQueueProducer(event_queue) + parse_error_queue_producer = queue.ItemQueueProducer(parse_error_queue) + + knowledge_base_object = knowledge_base.KnowledgeBase() + if knowledge_base_values: + for identifier, value in knowledge_base_values.iteritems(): + knowledge_base_object.SetValue(identifier, value) + + return context.ParserContext( + event_queue_producer, parse_error_queue_producer, + knowledge_base_object) + + def _GetTestFilePath(self, path_segments): + """Retrieves the path of a test file relative to the test data directory. + + Args: + path_segments: the path segments inside the test data directory. + + Returns: + A path of the test file. + """ + # Note that we need to pass the individual path segments to os.path.join + # and not a list. + return os.path.join(self._TEST_DATA_PATH, *path_segments) + + def _GetTestFileEntryFromPath(self, path_segments): + """Creates a dfVFS file_entry that references a file in the test dir. + + Args: + path_segments: the path segments inside the test data directory. + + Returns: + A dfVFS file_entry object. + """ + path = self._GetTestFilePath(path_segments) + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=path) + file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) + return file_entry + + + def _ParseFile(self, parser_object, path, knowledge_base_values=None): + """Parses a file using the parser object. + + Args: + parser_object: the parser object. + path: the path of the file to parse. + knowledge_base_values: optional dict containing the knowledge base + values. The default is None. + + Returns: + An event object queue consumer object (instance of + TestEventObjectQueueConsumer). + """ + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=path) + return self._ParseFileByPathSpec( + parser_object, path_spec, knowledge_base_values=knowledge_base_values) + + def _ParseFileByPathSpec( + self, parser_object, path_spec, knowledge_base_values=None): + """Parses a file using the parser object. + + Args: + parser_object: the parser object. + path_spec: the path specification of the file to parse. + knowledge_base_values: optional dict containing the knowledge base + values. The default is None. + + Returns: + An event object queue consumer object (instance of + TestEventObjectQueueConsumer). + """ + event_queue = single_process.SingleProcessQueue() + event_queue_consumer = TestEventObjectQueueConsumer(event_queue) + + parse_error_queue = single_process.SingleProcessQueue() + + parser_context = self._GetParserContext( + event_queue, parse_error_queue, + knowledge_base_values=knowledge_base_values) + file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) + parser_object.Parse(parser_context, file_entry) + + return event_queue_consumer + + def _TestGetMessageStrings( + self, event_object, expected_message, expected_message_short): + """Tests the formatting of the message strings. + + This function invokes the GetMessageStrings function of the event + formatter on the event object and compares the resulting messages + strings with those expected. + + Args: + event_object: the event object (instance of EventObject). + expected_message: the expected message string. + expected_message_short: the expected short message string. + """ + manager_object = formatters_manager.EventFormatterManager + message, message_short = manager_object.GetMessageStrings(event_object) + self.assertEquals(message, expected_message) + self.assertEquals(message_short, expected_message_short) + + def _TestGetSourceStrings( + self, event_object, expected_source, expected_source_short): + """Tests the formatting of the source strings. + + This function invokes the GetSourceStrings function of the event + formatter on the event object and compares the resulting source + strings with those expected. + + Args: + event_object: the event object (instance of EventObject). + expected_source: the expected source string. + expected_source_short: the expected short source string. + """ + manager_object = formatters_manager.EventFormatterManager + # TODO: change this to return the long variant first so it is consistent + # with GetMessageStrings. + source_short, source = manager_object.GetSourceStrings(event_object) + self.assertEquals(source, expected_source) + self.assertEquals(source_short, expected_source_short) diff --git a/plaso/parsers/text_parser.py b/plaso/parsers/text_parser.py new file mode 100644 index 0000000..3f1ee70 --- /dev/null +++ b/plaso/parsers/text_parser.py @@ -0,0 +1,1099 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a class to provide a parsing framework to plaso. + +This class contains a base framework class for parsing fileobjects, and +also some implementations that extend it to provide a more comprehensive +parser. +""" + +import abc +import csv +import logging +import os + +from dfvfs.helpers import text_file +import pyparsing + +from plaso.events import text_events +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import lexer +from plaso.lib import timelib +from plaso.lib import utils +from plaso.parsers import interface + +import pytz + +# Pylint complains about some functions not being implemented that shouldn't +# be since they need to be implemented by children. +# pylint: disable=abstract-method + + +class SlowLexicalTextParser(interface.BaseParser, lexer.SelfFeederMixIn): + """Generic text based parser that uses lexer to assist with parsing. + + This text parser is based on a rather slow lexer, which makes the + use of this interface highly discouraged. Parsers that already + implement it will most likely all be rewritten to support faster + text parsing implementations. + + This text based parser needs to be extended to provide an accurate + list of tokens that define the structure of the log file that the + parser is designed for. + """ + + # Define the max number of lines before we determine this is + # not the correct parser. + MAX_LINES = 15 + + # List of tokens that describe the structure of the log file. + tokens = [ + lexer.Token('INITIAL', '(.+)\n', 'ParseString', ''), + ] + + def __init__(self, local_zone=True): + """Constructor for the SlowLexicalTextParser. + + Args: + local_zone: A boolean value that determines if the entries + in the log file are stored in the local time + zone of the computer that stored it or in a fixed + timezone, like UTC. + """ + # TODO: remove the multiple inheritance. + lexer.SelfFeederMixIn.__init__(self) + interface.BaseParser.__init__(self) + self.line_ready = False + self.attributes = { + 'body': '', + 'iyear': 0, + 'imonth': 0, + 'iday': 0, + 'time': '', + 'hostname': '', + 'username': '', + } + self.local_zone = local_zone + self.file_entry = None + + def ClearValues(self): + """Clears all the values inside the attributes dict. + + All values that start with the letter 'i' are considered + to be an integer, otherwise string value is assumed. + """ + self.line_ready = False + for attr in self.attributes: + if attr[0] == 'i': + self.attributes[attr] = 0 + else: + self.attributes[attr] = '' + + def ParseIncomplete(self, match=None, **unused_kwargs): + """Indication that we've got a partial line to match against. + + Args: + match: The regular expression match object. + """ + self.attributes['body'] += match.group(0) + self.line_ready = True + + def ParseMessage(self, **unused_kwargs): + """Signal that a line is ready to be parsed.""" + self.line_ready = True + + def SetMonth(self, match=None, **unused_kwargs): + """Parses the month. + + This is a callback function for the text parser (lexer) and is + called by the corresponding lexer state. + + Args: + match: The regular expression match object. + """ + self.attributes['imonth'] = int( + timelib.MONTH_DICT.get(match.group(1).lower(), 1)) + + def SetDay(self, match=None, **unused_kwargs): + """Parses the day of the month. + + This is a callback function for the text parser (lexer) and is + called by the corresponding lexer state. + + Args: + match: The regular expression match object. + """ + self.attributes['iday'] = int(match.group(1)) + + def SetTime(self, match=None, **unused_kwargs): + """Set the time attribute. + + Args: + match: The regular expression match object. + """ + self.attributes['time'] = match.group(1) + + def SetYear(self, match=None, **unused_kwargs): + """Parses the year. + + This is a callback function for the text parser (lexer) and is + called by the corresponding lexer state. + + Args: + match: The regular expression match object. + """ + self.attributes['iyear'] = int(match.group(1)) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a text file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + UnableToParseFile: when the file cannot be parsed. + """ + path_spec_printable = u'{0:s}:{1:s}'.format( + file_entry.path_spec.type_indicator, file_entry.name) + file_object = file_entry.GetFileObject() + + self.file_entry = file_entry + # TODO: this is necessary since we inherit from lexer.SelfFeederMixIn. + self.file_object = file_object + + # Start by checking, is this a text file or not? Before we proceed + # any further. + file_object.seek(0, os.SEEK_SET) + if not utils.IsText(file_object.read(40)): + raise errors.UnableToParseFile(u'Not a text file, unable to proceed.') + + file_object.seek(0, os.SEEK_SET) + + error_count = 0 + file_verified = False + # We need to clear out few values in the Lexer before continuing. + # There might be some leftovers from previous run. + self.error = 0 + self.buffer = '' + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + while True: + _ = self.NextToken() + + if self.state == 'INITIAL': + self.entry_offset = getattr(self, 'next_entry_offset', 0) + self.next_entry_offset = file_object.tell() - len(self.buffer) + + if not file_verified and self.error >= self.MAX_LINES * 2: + logging.debug( + u'Lexer error count: {0:d} and current state {1:s}'.format( + self.error, self.state)) + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] unsupported file: {1:s}.'.format( + self.NAME, path_spec_printable)) + + if self.line_ready: + try: + event_object = self.ParseLine(parser_context) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + file_verified = True + + except errors.TimestampNotCorrectlyFormed as exception: + error_count += 1 + if file_verified: + logging.debug( + u'[{0:s} VERIFIED] Error count: {1:d} and ERROR: {2:d}'.format( + path_spec_printable, error_count, self.error)) + logging.warning( + u'[{0:s}] Unable to parse timestamp with error: {1:s}'.format( + self.NAME, exception)) + + else: + logging.debug(( + u'[{0:s} EVALUATING] Error count: {1:d} and ERROR: ' + u'{2:d})').format(path_spec_printable, error_count, self.error)) + + if error_count >= self.MAX_LINES: + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] unsupported file: {1:s}.'.format( + self.NAME, path_spec_printable)) + + finally: + self.ClearValues() + + if self.Empty(): + # Try to fill the buffer to prevent the parser from ending prematurely. + self.Feed() + + if self.Empty(): + break + + if not file_verified: + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] unable to parser file: {1:s}.'.format( + self.NAME, path_spec_printable)) + + file_offset = file_object.get_offset() + if file_offset < file_object.get_size(): + logging.error(( + u'{0:s} prematurely terminated parsing: {1:s} at offset: ' + u'0x{2:08x}.').format( + self.NAME, path_spec_printable, file_offset)) + file_object.close() + + def ParseString(self, match=None, **unused_kwargs): + """Return a string with combined values from the lexer. + + Args: + match: The regular expression match object. + + Returns: + A string that combines the values that are so far + saved from the lexer. + """ + try: + self.attributes['body'] += match.group(1).strip('\n') + except IndexError: + self.attributes['body'] += match.group(0).strip('\n') + + def PrintLine(self): + """"Return a string with combined values from the lexer.""" + year = getattr(self.attributes, 'iyear', None) + month = getattr(self.attributes, 'imonth', None) + day = getattr(self.attributes, 'iday', None) + + if None in [year, month, day]: + date_string = u'[DATE NOT SET]' + else: + try: + year = int(year, 10) + month = int(month, 10) + day = int(day, 10) + + date_string = u'{0:04d}-{1:02d}-{2:02d}'.format(year, month, day) + except ValueError: + date_string = u'[DATE INVALID]' + + time_string = getattr(self.attributes, 'time', u'[TIME NOT SET]') + hostname_string = getattr(self.attributes, 'hostname', u'HOSTNAME NOT SET') + reporter_string = getattr( + self.attributes, 'reporter', u'[REPORTER NOT SET]') + body_string = getattr(self.attributes, 'body', u'[BODY NOT SET]') + + # TODO: this is a work in progress. The reason for the try-catch is that + # the text parser is handed a non-text file and must deal with converting + # arbitrary binary data. + try: + line = u'{0:s} {1:s} [{2:s}] {3:s} => {4:s}'.format( + date_string, time_string, hostname_string, reporter_string, + body_string) + except UnicodeError: + line = 'Unable to print line - due to encoding error.' + + return line + + def ParseLine(self, parser_context): + """Return an event object extracted from the current line. + + Args: + parser_context: A parser context object (instance of ParserContext). + + Returns: + An event object (instance of TextEvent). + """ + if not self.attributes['time']: + raise errors.TimestampNotCorrectlyFormed( + u'Unable to parse timestamp, time not set.') + + if not self.attributes['iyear']: + raise errors.TimestampNotCorrectlyFormed( + u'Unable to parse timestamp, year not set.') + + times = self.attributes['time'].split(':') + if self.local_zone: + timezone = parser_context.timezone + else: + timezone = pytz.UTC + + if len(times) < 3: + raise errors.TimestampNotCorrectlyFormed(( + u'Unable to parse timestamp, not of the format HH:MM:SS ' + u'[{0:s}]').format(self.PrintLine())) + try: + secs = times[2].split('.') + if len(secs) == 2: + sec, us = secs + else: + sec = times[2] + us = 0 + + timestamp = timelib.Timestamp.FromTimeParts( + int(self.attributes['iyear']), self.attributes['imonth'], + self.attributes['iday'], int(times[0]), int(times[1]), + int(sec), microseconds=int(us), timezone=timezone) + + except ValueError as exception: + raise errors.TimestampNotCorrectlyFormed( + u'Unable to parse: {0:s} with error: {1:s}'.format( + self.PrintLine(), exception)) + + return self.CreateEvent( + timestamp, getattr(self, 'entry_offset', 0), self.attributes) + + # TODO: this is a rough initial implementation to get this working. + def CreateEvent(self, timestamp, offset, attributes): + """Creates an event. + + This function should be overwritten by text parsers that require + to generate specific event object type, the default is TextEvent. + + Args: + timestamp: The timestamp time value. The timestamp contains the + number of microseconds since Jan 1, 1970 00:00:00 UTC. + offset: The offset of the event. + attributes: A dict that contains the events attributes. + + Returns: + An event object (instance of TextEvent). + """ + return text_events.TextEvent(timestamp, offset, attributes) + + +class TextCSVParser(interface.BaseParser): + """An implementation of a simple CSV line-per-entry log files.""" + + # A list that contains the names of all the fields in the log file. + COLUMNS = [] + + # A CSV file is comma separated, but this can be overwritten to include + # tab, pipe or other character separation. + VALUE_SEPARATOR = ',' + + # If there is a header before the lines start it can be defined here, and + # the number of header lines that need to be skipped before the parsing + # starts. + NUMBER_OF_HEADER_LINES = 0 + + # If there is a special quote character used inside the structured text + # it can be defined here. + QUOTE_CHAR = '"' + + # Value that should not appear inside the file, made to test the actual + # file to see if it confirms to standards. + MAGIC_TEST_STRING = 'RegnThvotturMeistarans' + + def VerifyRow(self, unused_parser_context, unused_row): + """Return a bool indicating whether or not this is the correct parser. + + Args: + parser_context: A parser context object (instance of ParserContext). + row: A single row from the CSV file. + + Returns: + True if this is the correct parser, False otherwise. + """ + pass + + def ParseRow( + self, parser_context, row_offset, row, file_entry=None, + parser_chain=None): + """Parse a line of the log file and extract event objects. + + Args: + parser_context: A parser context object (instance of ParserContext). + row_offset: The offset of the row. + row: A dictionary containing all the fields as denoted in the + COLUMNS class list. + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + event_object = event.EventObject() + if row_offset is not None: + event_object.offset = row_offset + event_object.row_dict = row + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a CVS file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + path_spec_printable = file_entry.path_spec.comparable.replace(u'\n', u';') + file_object = file_entry.GetFileObject() + file_object.seek(0, os.SEEK_SET) + + text_file_object = text_file.TextFile(file_object) + + # If we specifically define a number of lines we should skip do that here. + for _ in range(0, self.NUMBER_OF_HEADER_LINES): + _ = text_file_object.readline() + + reader = csv.DictReader( + text_file_object, fieldnames=self.COLUMNS, + restkey=self.MAGIC_TEST_STRING, restval=self.MAGIC_TEST_STRING, + delimiter=self.VALUE_SEPARATOR, quotechar=self.QUOTE_CHAR) + + try: + row = reader.next() + except (csv.Error, StopIteration): + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] Unable to parse CSV file: {1:s}.'.format( + self.NAME, path_spec_printable)) + + number_of_columns = len(self.COLUMNS) + number_of_records = len(row) + + if number_of_records != number_of_columns: + file_object.close() + raise errors.UnableToParseFile(( + u'[{0:s}] Unable to parse CSV file: {1:s}. Wrong number of ' + u'records (expected: {2:d}, got: {3:d})').format( + self.NAME, path_spec_printable, number_of_columns, + number_of_records)) + + for key, value in row.items(): + if key == self.MAGIC_TEST_STRING or value == self.MAGIC_TEST_STRING: + file_object.close() + raise errors.UnableToParseFile(( + u'[{0:s}] Unable to parse CSV file: {1:s}. Signature ' + u'mismatch.').format(self.NAME, path_spec_printable)) + + if not self.VerifyRow(parser_context, row): + file_object.close() + raise errors.UnableToParseFile(( + u'[{0:s}] Unable to parse CSV file: {1:s}. Verification ' + u'failed.').format(self.NAME, path_spec_printable)) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + self.ParseRow( + parser_context, text_file_object.tell(), row, file_entry=file_entry, + parser_chain=parser_chain) + + for row in reader: + self.ParseRow( + parser_context, text_file_object.tell(), row, file_entry=file_entry, + parser_chain=parser_chain) + + file_object.close() + + +def PyParseRangeCheck(lower_bound, upper_bound): + """Verify that a number is within a defined range. + + This is a callback method for pyparsing setParseAction + that verifies that a read number is within a certain range. + + To use this method it needs to be defined as a callback method + in setParseAction with the upper and lower bound set as parameters. + + Args: + lower_bound: An integer representing the lower bound of the range. + upper_bound: An integer representing the upper bound of the range. + + Returns: + A callback method that can be used by pyparsing setParseAction. + """ + def CheckRange(unused_string, unused_location, tokens): + """Parse the arguments.""" + try: + check_number = tokens[0] + except IndexError: + check_number = -1 + + if check_number < lower_bound: + raise pyparsing.ParseException( + u'Value: {0:d} precedes lower bound: {1:d}'.format( + check_number, lower_bound)) + + if check_number > upper_bound: + raise pyparsing.ParseException( + u'Value: {0:d} exceeds upper bound: {1:d}'.format( + check_number, upper_bound)) + + # Since callback methods for pyparsing need to accept certain parameters + # and there is no way to define conditions, like upper and lower bounds + # we need to return here a method that accepts those pyparsing parameters. + return CheckRange + + +def PyParseIntCast(unused_string, unused_location, tokens): + """Return an integer from a string. + + This is a pyparsing callback method that converts the matched + string into an integer. + + The method modifies the content of the tokens list and converts + them all to an integer value. + + Args: + unused_string: The original parsed string. + unused_location: The location within the string where the match was made. + tokens: A list of extracted tokens (where the string to be converted is + stored). + """ + # Cast the regular tokens. + for index, token in enumerate(tokens): + try: + tokens[index] = int(token) + except ValueError: + logging.error(u'Unable to cast [{0:s}] to an int, setting to 0'.format( + token)) + tokens[index] = 0 + + # We also need to cast the dictionary built tokens. + for key in tokens.keys(): + try: + tokens[key] = int(tokens[key], 10) + except ValueError: + logging.error( + u'Unable to cast [{0:s} = {1:d}] to an int, setting to 0'.format( + key, tokens[key])) + tokens[key] = 0 + + +def PyParseJoinList(unused_string, unused_location, tokens): + """Return a joined token from a list of tokens. + + This is a callback method for pyparsing setParseAction that modifies + the returned token list to join all the elements in the list to a single + token. + + Args: + unused_string: The original parsed string. + unused_location: The location within the string where the match was made. + tokens: A list of extracted tokens. This is the list that should be joined + together and stored as a single token. + """ + join_list = [] + for token in tokens: + try: + join_list.append(str(token)) + except UnicodeDecodeError: + join_list.append(repr(token)) + + tokens[0] = u''.join(join_list) + del tokens[1:] + + +class PyparsingConstants(object): + """A class that maintains constants for pyparsing.""" + + # Numbers. + INTEGER = pyparsing.Word(pyparsing.nums).setParseAction(PyParseIntCast) + IPV4_OCTET = pyparsing.Word(pyparsing.nums, min=1, max=3).setParseAction( + PyParseIntCast, PyParseRangeCheck(0, 255)) + IPV4_ADDRESS = (IPV4_OCTET + ('.' + IPV4_OCTET) * 3).setParseAction( + PyParseJoinList) + + # TODO: Fix the IPv6 address specification to be more accurate (8 :, correct + # size, etc). + IPV6_ADDRESS = pyparsing.Word(':' + pyparsing.hexnums).setParseAction( + PyParseJoinList) + + # Common words. + MONTH = pyparsing.Word( + pyparsing.string.uppercase, pyparsing.string.lowercase, + exact=3) + + # Define date structures. + HYPHEN = pyparsing.Literal('-').suppress() + YEAR = pyparsing.Word(pyparsing.nums, exact=4).setParseAction( + PyParseIntCast) + TWO_DIGITS = pyparsing.Word(pyparsing.nums, exact=2).setParseAction( + PyParseIntCast) + ONE_OR_TWO_DIGITS = pyparsing.Word( + pyparsing.nums, min=1, max=2).setParseAction(PyParseIntCast) + DATE = pyparsing.Group( + YEAR + pyparsing.Suppress('-') + TWO_DIGITS + + pyparsing.Suppress('-') + TWO_DIGITS) + DATE_REV = pyparsing.Group( + TWO_DIGITS + pyparsing.Suppress('-') + TWO_DIGITS + + pyparsing.Suppress('-') + YEAR) + TIME = pyparsing.Group( + TWO_DIGITS + pyparsing.Suppress(':') + TWO_DIGITS + + pyparsing.Suppress(':') + TWO_DIGITS) + TIME_MSEC = TIME + pyparsing.Suppress('.') + INTEGER + DATE_TIME = DATE + TIME + DATE_TIME_MSEC = DATE + TIME_MSEC + + COMMENT_LINE_HASH = pyparsing.Literal('#') + pyparsing.SkipTo( + pyparsing.LineEnd()) + # TODO: Add more commonly used structs that can be used by parsers. + PID = pyparsing.Word( + pyparsing.nums, min=1, max=5).setParseAction(PyParseIntCast) + + +class PyparsingSingleLineTextParser(interface.BaseParser): + """Single line text parser based on the pyparsing library.""" + + # The actual structure, this needs to be defined by each parser. + # This is defined as a list of tuples so that more then a single line + # structure can be defined. That way the parser can support more than a + # single type of log entry, despite them all having in common the constraint + # that each log entry is a single line. + # The tuple should have two entries, a key and a structure. This is done to + # keep the structures in an order of priority/preference. + # The key is a comment or an identification that is passed to the ParseRecord + # function so that the developer can identify which structure got parsed. + # The value is the actual pyparsing structure. + LINE_STRUCTURES = [] + + # In order for the tool to not read too much data into a buffer to evaluate + # whether or not the parser is the right one for this file or not we + # specifically define a maximum amount of bytes a single line can occupy. This + # constant can be overwritten by implementations if their format might have a + # longer line than 400 bytes. + MAX_LINE_LENGTH = 400 + + # Define an encoding. If a file is encoded using specific encoding it is + # advised to include it here. If this class constant is set all lines wil be + # decoded prior to being sent to parsing by pyparsing, if not properly set it + # could negatively affect parsing of the file. + # If this value needs to be calculated on the fly (not a fixed constant for + # this particular file type) it can be done by modifying the self.encoding + # attribute. + ENCODING = '' + + def __init__(self): + """Initializes the pyparsing single-line text parser object.""" + super(PyparsingSingleLineTextParser, self).__init__() + self.encoding = self.ENCODING + self._current_offset = 0 + # TODO: self._line_structures is a work-around and this needs + # a structural fix. + self._line_structures = self.LINE_STRUCTURES + + def _ReadLine( + self, parser_context, file_entry, text_file_object, max_len=0, + quiet=False, depth=0): + """Read a single line from a text file and return it back. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + text_file_object: A text file object (instance of dfvfs.TextFile). + max_len: If defined determines the maximum number of bytes a single line + can take. + quiet: If True then a decode warning is not displayed. + depth: A threshold of how many newlines we can encounter before bailing + out. + + Returns: + A single line read from the file-like object, or the maximum number of + characters (if max_len defined and line longer than the defined size). + """ + if max_len: + line = text_file_object.readline(max_len) + else: + line = text_file_object.readline() + + if not line: + return + + # If line is empty, skip it and go on. + if line == '\n' or line == '\r\n': + # Max 40 new lines in a row before we bail out. + if depth == 40: + return '' + + return self._ReadLine( + parser_context, file_entry, text_file_object, max_len=max_len, + depth=depth + 1) + + if not self.encoding: + return line.strip() + + try: + decoded_line = line.decode(self.encoding) + return decoded_line.strip() + except UnicodeDecodeError: + if not quiet: + logging.warning(( + u'Unable to decode line [{0:s}...] with encoding: {1:s} in ' + u'file: {2:s}').format( + repr(line[1:30]), self.encoding, + parser_context.GetDisplayName(file_entry))) + return line.strip() + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a text file using a pyparsing definition. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + UnableToParseFile: when the file cannot be parsed. + """ + # TODO: find a more elegant way for this; currently the mac_wifi and + # syslog parser seem to rely on this member. + self.file_entry = file_entry + + file_object = file_entry.GetFileObject() + + # TODO: self._line_structures is a work-around and this needs + # a structural fix. + if not self._line_structures: + raise errors.UnableToParseFile( + u'Line structure undeclared, unable to proceed.') + + file_object.seek(0, os.SEEK_SET) + text_file_object = text_file.TextFile(file_object) + + line = self._ReadLine( + parser_context, file_entry, text_file_object, + max_len=self.MAX_LINE_LENGTH, quiet=True) + if not line: + raise errors.UnableToParseFile(u'Not a text file.') + + if len(line) == self.MAX_LINE_LENGTH or len( + line) == self.MAX_LINE_LENGTH - 1: + logging.debug(( + u'Trying to read a line and reached the maximum allowed length of ' + u'{0:d}. The last few bytes of the line are: {1:s} [parser ' + u'{2:s}]').format( + self.MAX_LINE_LENGTH, repr(line[-10:]), self.NAME)) + + if not utils.IsText(line): + raise errors.UnableToParseFile(u'Not a text file, unable to proceed.') + + if not self.VerifyStructure(parser_context, line): + raise errors.UnableToParseFile('Wrong file structure.') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + # Set the offset to the beginning of the file. + self._current_offset = 0 + # Read every line in the text file. + while line: + parsed_structure = None + use_key = None + # Try to parse the line using all the line structures. + for key, structure in self.LINE_STRUCTURES: + try: + parsed_structure = structure.parseString(line) + except pyparsing.ParseException: + pass + if parsed_structure: + use_key = key + break + + if parsed_structure: + parsed_event = self.ParseRecord( + parser_context, use_key, parsed_structure) + if parsed_event: + parsed_event.offset = self._current_offset + parser_context.ProduceEvent( + parsed_event, parser_chain=parser_chain, file_entry=file_entry) + else: + logging.warning(u'Unable to parse log line: {0:s}'.format(line)) + + self._current_offset = text_file_object.get_offset() + line = self._ReadLine(parser_context, file_entry, text_file_object) + + file_object.close() + + @abc.abstractmethod + def ParseRecord(self, parser_context, key, structure): + """Parse a single extracted pyparsing structure. + + This function takes as an input a parsed pyparsing structure + and produces an EventObject if possible from that structure. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + + @abc.abstractmethod + def VerifyStructure(self, parser_context, line): + """Verify the structure of the file and return boolean based on that check. + + This function should read enough text from the text file to confirm + that the file is the correct one for this particular parser. + + Args: + parser_context: A parser context object (instance of ParserContext). + line: A single line from the text file. + + Returns: + True if this is the correct parser, False otherwise. + """ + + +class EncodedTextReader(object): + """Class to read simple encoded text.""" + + def __init__(self, buffer_size=2048, encoding=None): + """Initializes the encoded test reader object. + + Args: + buffer_size: optional buffer size. The default is 2048. + encoding: optional encoding. The default is None. + """ + super(EncodedTextReader, self).__init__() + self._buffer = '' + self._buffer_size = buffer_size + self._current_offset = 0 + self._encoding = encoding + + if self._encoding: + self._new_line = u'\n'.encode(self._encoding) + self._carriage_return = u'\r'.encode(self._encoding) + else: + self._new_line = '\n' + self._carriage_return = '\r' + + self._new_line_length = len(self._new_line) + self._carriage_return_length = len(self._carriage_return) + + self.lines = u'' + + def _ReadLine(self, file_object): + """Reads a line from the file object. + + Args: + file_object: the file-like object. + + Returns: + A string containing the line. + """ + if len(self._buffer) < self._buffer_size: + self._buffer = ''.join([ + self._buffer, file_object.read(self._buffer_size)]) + + line, new_line, self._buffer = self._buffer.partition(self._new_line) + if not line and not new_line: + line = self._buffer + self._buffer = '' + + self._current_offset += len(line) + + # Strip carriage returns from the text. + if line.endswith(self._carriage_return): + line = line[:-self._carriage_return_length] + + if new_line: + line = ''.join([line, self._new_line]) + self._current_offset += self._new_line_length + + # If a parser specifically indicates specific encoding we need + # to handle the buffer as it is an encoded string. + # If it fails we fail back to the original raw string. + if self._encoding: + try: + line = line.decode(self._encoding) + except UnicodeDecodeError: + # TODO: it might be better to raise here. + pass + + return line + + def ReadLine(self, file_object): + """Reads a line. + + Args: + file_object: the file-like object. + + Returns: + A single line read from the lines buffer. + """ + line, _, self.lines = self.lines.partition('\n') + if not line: + self.ReadLines(file_object) + line, _, self.lines = self.lines.partition('\n') + + return line + + def ReadLines(self, file_object): + """Reads lines into the lines buffer. + + Args: + file_object: the file-like object. + """ + lines_size = len(self.lines) + if lines_size < self._buffer_size: + lines_size = self._buffer_size - lines_size + while lines_size > 0: + line = self._ReadLine(file_object) + if not line: + break + + self.lines = u''.join([self.lines, line]) + lines_size -= len(line) + + def Reset(self): + """Resets the encoded text reader.""" + self._buffer = '' + self._current_offset = 0 + + self.lines = u'' + + def SkipAhead(self, file_object, number_of_characters): + """Skips ahead a number of characters. + + Args: + file_object: the file-like object. + number_of_characters: the number of characters. + """ + lines_size = len(self.lines) + while number_of_characters >= lines_size: + number_of_characters -= lines_size + + self.lines = u'' + self.ReadLines(file_object) + lines_size = len(self.lines) + if lines_size == 0: + return + + self.lines = self.lines[number_of_characters:] + + +class PyparsingMultiLineTextParser(PyparsingSingleLineTextParser): + """Multi line text parser based on the pyparsing library.""" + + BUFFER_SIZE = 2048 + + def __init__(self): + """Initializes the pyparsing multi-line text parser object.""" + super(PyparsingMultiLineTextParser, self).__init__() + self._buffer_size = self.BUFFER_SIZE + self._text_reader = EncodedTextReader( + buffer_size=self.BUFFER_SIZE, encoding=self.ENCODING) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Parse a text file using a pyparsing definition. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + UnableToParseFile: if the line structures are missing. + """ + if not self.LINE_STRUCTURES: + raise errors.UnableToParseFile(u'Missing line structures.') + + self._text_reader.Reset() + + file_object = file_entry.GetFileObject() + file_object.seek(0, os.SEEK_SET) + + try: + self._text_reader.ReadLines(file_object) + except UnicodeDecodeError as exception: + raise errors.UnableToParseFile( + u'Not a text file, with error: {0:s}'.format(exception)) + + if not utils.IsText(self._text_reader.lines): + raise errors.UnableToParseFile(u'Not a text file, unable to proceed.') + + if not self.VerifyStructure(parser_context, self._text_reader.lines): + raise errors.UnableToParseFile(u'Wrong file structure.') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + # Read every line in the text file. + while self._text_reader.lines: + # Initialize pyparsing objects. + tokens = None + start = 0 + end = 0 + + key = None + + # Try to parse the line using all the line structures. + for key, structure in self.LINE_STRUCTURES: + try: + parsed_structure = next( + structure.scanString(self._text_reader.lines, maxMatches=1), None) + except pyparsing.ParseException: + continue + + if not parsed_structure: + continue + + tokens, start, end = parsed_structure + + # Only want to parse the structure if it starts + # at the beginning of the buffer. + if start == 0: + break + + if tokens and start == 0: + parsed_event = self.ParseRecord(parser_context, key, tokens) + if parsed_event: + # TODO: need a reliable way to handle this. + # parsed_event.offset = self._text_reader.line_offset + parser_context.ProduceEvent( + parsed_event, parser_chain=parser_chain, file_entry=file_entry) + + self._text_reader.SkipAhead(file_object, end) + + else: + odd_line = self._text_reader.ReadLine(file_object) + if odd_line: + logging.warning( + u'Unable to parse log line: {0:s}'.format(repr(odd_line))) + + try: + self._text_reader.ReadLines(file_object) + except UnicodeDecodeError as exception: + logging.error( + u'[{0:s}] Unable to read lines from file: {1:s} with error: ' + u'{2:s}'.format( + parser_chain, + file_entry.path_spec.comparable.replace(u'\n', u';'), + exception)) diff --git a/plaso/parsers/text_parser_test.py b/plaso/parsers/text_parser_test.py new file mode 100644 index 0000000..96c2b11 --- /dev/null +++ b/plaso/parsers/text_parser_test.py @@ -0,0 +1,181 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the tests for the generic text parser.""" + +import unittest + +import pyparsing + +from plaso.events import text_events +from plaso.formatters import interface as formatters_interface +from plaso.formatters import manager as formatters_manager +from plaso.lib import errors +from plaso.lib import lexer +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import text_parser + + +class TestTextEvent(text_events.TextEvent): + """Test text event.""" + DATA_TYPE = 'test:parser:text' + + +class TestTextEventFormatter(formatters_interface.EventFormatter): + """Test text event formatter.""" + DATA_TYPE = 'test:parser:text' + FORMAT_STRING = u'{body}' + + SOURCE_LONG = 'Test Text Parser' + + +class TestTextParser(text_parser.SlowLexicalTextParser): + """Implement a text parser object that can successfully parse a text file. + + To be able to achieve that one function has to be implemented, the ParseDate + one. + """ + NAME = 'test_text' + + tokens = [ + lexer.Token('INITIAL', + r'^([\d\/]+) ', 'SetDate', 'TIME'), + lexer.Token('TIME', r'([0-9:\.]+) ', 'SetTime', 'STRING_HOST'), + lexer.Token('STRING_HOST', r'([^\-]+)- ', 'ParseStringHost', 'STRING'), + lexer.Token('STRING', '([^\n]+)', 'ParseString', ''), + lexer.Token('STRING', '\n', 'ParseMessage', 'INITIAL')] + + def ParseStringHost(self, match, **_): + user, host = match.group(1).split(':') + self.attributes['hostname'] = host + self.attributes['username'] = user + + def SetDate(self, match, **_): + month, day, year = match.group(1).split('/') + self.attributes['imonth'] = int(month) + self.attributes['iyear'] = int(year) + self.attributes['iday'] = int(day) + + def Scan(self, unused_file_entry): + pass + + def CreateEvent(self, timestamp, offset, attributes): + event_object = TestTextEvent(timestamp, 0, attributes) + event_object.offset = offset + return event_object + + +class TextParserTest(test_lib.ParserTestCase): + """An unit test for the plaso parser library.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = TestTextParser() + + def testTextParserFail(self): + """Test a text parser that will not match against content.""" + test_file = self._GetTestFilePath(['text_parser', 'test1.txt']) + + with self.assertRaises(errors.UnableToParseFile): + _ = self._ParseFile(self._parser, test_file) + + def testTextParserSuccess(self): + """Test a text parser that will match against content.""" + test_file = self._GetTestFilePath(['text_parser', 'test2.txt']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + event_object = event_objects[0] + + msg1, _ = formatters_manager.EventFormatterManager.GetMessageStrings( + event_object) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-01-01 05:23:15') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(msg1, 'first line.') + self.assertEquals(event_object.hostname, 'myhost') + self.assertEquals(event_object.username, 'myuser') + + event_object = event_objects[1] + + msg2, _ = formatters_manager.EventFormatterManager.GetMessageStrings( + event_object) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '1991-12-24 19:58:06') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(msg2, 'second line.') + self.assertEquals(event_object.hostname, 'myhost') + self.assertEquals(event_object.username, 'myuser') + + +class PyParserTest(test_lib.ParserTestCase): + """Few unit tests for the pyparsing unit.""" + + def _CheckIPv4(self, ip_address): + # TODO: Add a similar IPv6 check. + try: + text_parser.PyparsingConstants.IPV4_ADDRESS.parseString(ip_address) + return True + except pyparsing.ParseException: + return False + + def testPyConstantIPv4(self): + """Run few tests to make sure the constants are working.""" + self.assertTrue(self._CheckIPv4('123.51.234.52')) + self.assertTrue(self._CheckIPv4('255.254.23.1')) + self.assertTrue(self._CheckIPv4('1.1.34.2')) + self.assertFalse(self._CheckIPv4('1.1.34.258')) + self.assertFalse(self._CheckIPv4('a.1.34.258')) + self.assertFalse(self._CheckIPv4('.34.258')) + self.assertFalse(self._CheckIPv4('34.258')) + self.assertFalse(self._CheckIPv4('10.52.34.258')) + + def testPyConstantOctet(self): + with self.assertRaises(pyparsing.ParseException): + text_parser.PyparsingConstants.IPV4_OCTET.parseString('526') + + with self.assertRaises(pyparsing.ParseException): + text_parser.PyparsingConstants.IPV4_OCTET.parseString('1026') + + with self.assertRaises(pyparsing.ParseException): + text_parser.PyparsingConstants.IPV4_OCTET.parseString( + 'a9', parseAll=True) + + def testPyConstantOthers(self): + with self.assertRaises(pyparsing.ParseException): + text_parser.PyparsingConstants.MONTH.parseString('MMo') + with self.assertRaises(pyparsing.ParseException): + text_parser.PyparsingConstants.MONTH.parseString('M') + with self.assertRaises(pyparsing.ParseException): + text_parser.PyparsingConstants.MONTH.parseString('March', parseAll=True) + + self.assertTrue(text_parser.PyparsingConstants.MONTH.parseString('Jan')) + + line = '# This is a comment.' + parsed_line = text_parser.PyparsingConstants.COMMENT_LINE_HASH.parseString( + line) + self.assertEquals(parsed_line[-1], 'This is a comment.') + self.assertEquals(len(parsed_line), 2) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/utmp.py b/plaso/parsers/utmp.py new file mode 100644 index 0000000..0515b1e --- /dev/null +++ b/plaso/parsers/utmp.py @@ -0,0 +1,273 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Linux UTMP files.""" + +import construct +import logging +import os +import socket + +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class UtmpEvent(event.EventObject): + """Convenience class for an UTMP event.""" + + DATA_TYPE = 'linux:utmp:event' + + def __init__( + self, timestamp, microsecond, user, computer_name, + terminal, status, ip_address, structure): + """Initializes the event object. + + Args: + timestamp: Epoch when the terminal was started. + microsecond: number of microseconds related with timestamp. + user: active user name. + computer_name: name of the computer. + terminal: type of terminal. + status: login status. + ip_address: ip_address from the connection is done. + structure: entry structure parsed. + exit: integer that represents the exit status. + pid: integer with the process ID. + terminal_id: integer with the Inittab ID. + """ + super(UtmpEvent, self).__init__() + self.timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( + timestamp, microsecond) + self.timestamp_desc = eventdata.EventTimestamp.START_TIME + self.user = user + self.computer_name = computer_name + self.terminal = terminal + self.status = status + self.ip_address = ip_address + self.exit = structure.exit + self.pid = structure.pid + self.terminal_id = structure.terminal_id + + +class UtmpParser(interface.BaseParser): + """Parser for Linux/Unix UTMP files.""" + + NAME = 'utmp' + DESCRIPTION = u'Parser for Linux/Unix UTMP files.' + + LINUX_UTMP_ENTRY = construct.Struct( + 'utmp_linux', + construct.ULInt32('type'), + construct.ULInt32('pid'), + construct.String('terminal', 32), + construct.ULInt32('terminal_id'), + construct.String('username', 32), + construct.String('hostname', 256), + construct.ULInt16('termination'), + construct.ULInt16('exit'), + construct.ULInt32('session'), + construct.ULInt32('timestamp'), + construct.ULInt32('microsecond'), + construct.ULInt32('address_a'), + construct.ULInt32('address_b'), + construct.ULInt32('address_c'), + construct.ULInt32('address_d'), + construct.Padding(20)) + + LINUX_UTMP_ENTRY_SIZE = LINUX_UTMP_ENTRY.sizeof() + + STATUS_TYPE = { + 0: 'EMPTY', + 1: 'RUN_LVL', + 2: 'BOOT_TIME', + 3: 'NEW_TIME', + 4: 'OLD_TIME', + 5: 'INIT_PROCESS', + 6: 'LOGIN_PROCESS', + 7: 'USER_PROCESS', + 8: 'DEAD_PROCESS', + 9: 'ACCOUNTING'} + + # Set a default test value for few fields, this is supposed to be a text + # that is highly unlikely to be seen in a terminal field, or a username field. + # It is important that this value does show up in such fields, but otherwise + # it can be a free flowing text field. + _DEFAULT_TEST_VALUE = u'Ekki Fraedilegur Moguleiki, thetta er bull ! = + _<>' + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from an UTMP file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + try: + structure = self.LINUX_UTMP_ENTRY.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + file_object.close() + raise errors.UnableToParseFile( + u'Unable to parse UTMP Header with error: {0:s}'.format(exception)) + + if structure.type not in self.STATUS_TYPE: + file_object.close() + raise errors.UnableToParseFile(( + u'Not an UTMP file, unknown type ' + u'[{0:d}].').format(structure.type)) + + if not self._VerifyTextField(structure.terminal): + file_object.close() + raise errors.UnableToParseFile( + u'Not an UTMP file, unknown terminal.') + + if not self._VerifyTextField(structure.username): + file_object.close() + raise errors.UnableToParseFile( + u'Not an UTMP file, unknown username.') + + if not self._VerifyTextField(structure.hostname): + file_object.close() + raise errors.UnableToParseFile( + u'Not an UTMP file, unknown hostname.') + + # Check few values. + terminal = self._GetTextFromNullTerminatedString( + structure.terminal, self._DEFAULT_TEST_VALUE) + if terminal == self._DEFAULT_TEST_VALUE: + raise errors.UnableToParseFile( + u'Not an UTMP file, no terminal set.') + + username = self._GetTextFromNullTerminatedString( + structure.username, self._DEFAULT_TEST_VALUE) + + if username == self._DEFAULT_TEST_VALUE: + raise errors.UnableToParseFile( + u'Not an UTMP file, no username set.') + + if not structure.timestamp: + raise errors.UnableToParseFile( + u'Not an UTMP file, no timestamp set in the first record.') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + file_object.seek(0, os.SEEK_SET) + event_object = self._ReadUtmpEvent(file_object) + while event_object: + event_object.offset = file_object.tell() + parser_context.ProduceEvent( + event_object, file_entry=file_entry, parser_chain=None) + + event_object = self._ReadUtmpEvent(file_object) + + file_object.close() + + def _VerifyTextField(self, text): + """Check if a bytestream is a null terminated string. + + Args: + event_object: text field from the structure. + + Return: + True if it is a null terminated string, False otherwise. + """ + _, _, null_chars = text.partition('\x00') + if not null_chars: + return False + return len(null_chars) == null_chars.count('\x00') + + def _ReadUtmpEvent(self, file_object): + """Returns an UtmpEvent from a single UTMP entry. + + Args: + file_object: a file-like object that points to an UTMP file. + + Returns: + An event object constructed from a single UTMP record or None if we + have reached the end of the file (or EOF). + """ + offset = file_object.tell() + data = file_object.read(self.LINUX_UTMP_ENTRY_SIZE) + if not data or len(data) != self.LINUX_UTMP_ENTRY_SIZE: + return + try: + entry = self.LINUX_UTMP_ENTRY.parse(data) + except (IOError, construct.FieldError): + logging.warning(( + u'UTMP entry at 0x{:x} couldn\'t be parsed.').format(offset)) + return self.__ReadUtmpEvent(file_object) + + user = self._GetTextFromNullTerminatedString(entry.username) + terminal = self._GetTextFromNullTerminatedString(entry.terminal) + if terminal == '~': + terminal = u'system boot' + computer_name = self._GetTextFromNullTerminatedString(entry.hostname) + if computer_name == u'N/A' or computer_name == u':0': + computer_name = u'localhost' + status = self.STATUS_TYPE.get(entry.type, u'N/A') + + if not entry.address_b: + try: + ip_address = socket.inet_ntoa( + construct.ULInt32('int').build(entry.address_a)) + if ip_address == '0.0.0.0': + ip_address = u'localhost' + except (IOError, construct.FieldError, socket.error): + ip_address = u'N/A' + else: + ip_address = u'{0:d}.{1:d}.{2:d}.{3:d}'.format( + entry.address_a, entry.address_b, entry.address_c, entry.address_d) + + return UtmpEvent( + entry.timestamp, entry.microsecond, user, computer_name, terminal, + status, ip_address, entry) + + def _GetTextFromNullTerminatedString( + self, null_terminated_string, default_string=u'N/A'): + """Get a UTF-8 text from a raw null terminated string. + + Args: + null_terminated_string: Raw string terminated with null character. + default_string: The default string returned if the parser fails. + + Returns: + A decoded UTF-8 string or if unable to decode, the supplied default + string. + """ + text, _, _ = null_terminated_string.partition('\x00') + try: + text = text.decode('utf-8') + except UnicodeDecodeError: + logging.warning( + u'[UTMP] Decode UTF8 failed, the message string may be cut short.') + text = text.decode('utf-8', 'ignore') + if not text: + return default_string + return text + + +manager.ParsersManager.RegisterParser(UtmpParser) diff --git a/plaso/parsers/utmp_test.py b/plaso/parsers/utmp_test.py new file mode 100644 index 0000000..49985bb --- /dev/null +++ b/plaso/parsers/utmp_test.py @@ -0,0 +1,136 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser test for utmp files.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import utmp as utmp_formatter +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import utmp + + +class UtmpParserTest(test_lib.ParserTestCase): + """The unit test for UTMP parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = utmp.UtmpParser() + + def testParseUtmpFile(self): + """Tests the Parse function for an UTMP file.""" + test_file = self._GetTestFilePath(['utmp']) + events = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(events) + self.assertEqual(len(event_objects), 14) + event_object = event_objects[0] + self.assertEqual(event_object.terminal, u'system boot') + self.assertEqual(event_object.status, u'BOOT_TIME') + event_object = event_objects[1] + self.assertEqual(event_object.status, u'RUN_LVL') + + event_object = event_objects[2] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-13 14:45:09') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.user, u'LOGIN') + self.assertEqual(event_object.computer_name, u'localhost') + self.assertEqual(event_object.terminal, u'tty4') + self.assertEqual(event_object.status, u'LOGIN_PROCESS') + self.assertEqual(event_object.exit, 0) + self.assertEqual(event_object.pid, 1115) + self.assertEqual(event_object.terminal_id, 52) + expected_msg = ( + u'User: LOGIN ' + u'Computer Name: localhost ' + u'Terminal: tty4 ' + u'PID: 1115 ' + u'Terminal_ID: 52 ' + u'Status: LOGIN_PROCESS ' + u'IP Address: localhost ' + u'Exit: 0') + expected_msg_short = ( + u'User: LOGIN') + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[12] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-12-18 22:46:56.305504') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.user, u'moxilo') + self.assertEqual(event_object.computer_name, u'localhost') + self.assertEqual(event_object.terminal, u'pts/4') + self.assertEqual(event_object.status, u'USER_PROCESS') + self.assertEqual(event_object.exit, 0) + self.assertEqual(event_object.pid, 2684) + self.assertEqual(event_object.terminal_id, 13359) + expected_msg = ( + u'User: moxilo ' + u'Computer Name: localhost ' + u'Terminal: pts/4 ' + u'PID: 2684 ' + u'Terminal_ID: 13359 ' + u'Status: USER_PROCESS ' + u'IP Address: localhost ' + u'Exit: 0') + expected_msg_short = ( + u'User: moxilo') + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + def testParseWtmpFile(self): + """Tests the Parse function for an WTMP file.""" + test_file = self._GetTestFilePath(['wtmp.1']) + events = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(events) + self.assertEqual(len(event_objects), 4) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-12-01 17:36:38.432935') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.user, u'userA') + self.assertEqual(event_object.computer_name, u'10.10.122.1') + self.assertEqual(event_object.terminal, u'pts/32') + self.assertEqual(event_object.status, u'USER_PROCESS') + self.assertEqual(event_object.ip_address, u'10.10.122.1') + self.assertEqual(event_object.exit, 0) + self.assertEqual(event_object.pid, 20060) + self.assertEqual(event_object.terminal_id, 842084211) + expected_msg = ( + u'User: userA ' + u'Computer Name: 10.10.122.1 ' + u'Terminal: pts/32 ' + u'PID: 20060 ' + u'Terminal_ID: 842084211 ' + u'Status: USER_PROCESS ' + u'IP Address: 10.10.122.1 ' + u'Exit: 0') + expected_msg_short = ( + u'User: userA') + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/utmpx.py b/plaso/parsers/utmpx.py new file mode 100644 index 0000000..e48cd1c --- /dev/null +++ b/plaso/parsers/utmpx.py @@ -0,0 +1,207 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for utmpx files.""" + +# TODO: Add support for other implementations than Mac OS X. +# The parser should be checked against IOS UTMPX file. + +import construct +import logging + +from plaso.lib import errors +from plaso.lib import event +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +__author__ = 'Joaquin Moreno Garijo (Joaquin.MorenoGarijo.2013@live.rhul.ac.uk)' + + +class UtmpxMacOsXEvent(event.EventObject): + """Convenience class for an event utmpx.""" + DATA_TYPE = 'mac:utmpx:event' + + def __init__(self, timestamp, user, terminal, status, computer_name): + """Initializes the event object. + + Args: + timestamp: when the terminal was started + user: active user name + terminal: name of the terminal + status: terminal status + computer_name: name of the host or IP. + """ + super(UtmpxMacOsXEvent, self).__init__() + self.timestamp = timestamp + self.timestamp_desc = eventdata.EventTimestamp.START_TIME + self.user = user + self.terminal = terminal + self.status = status + self.computer_name = computer_name + + +class UtmpxParser(interface.BaseParser): + """Parser for UTMPX files.""" + + NAME = 'utmpx' + DESCRIPTION = u'Parser for UTMPX files.' + + # INFO: Type is suppose to be a short (2 bytes), + # however if we analyze the file it is always + # byte follow by 3 bytes with \x00 value. + MAC_UTMPX_ENTRY = construct.Struct( + 'utmpx_mac', + construct.String('user', 256), + construct.ULInt32('id'), + construct.String('tty_name', 32), + construct.ULInt32('pid'), + construct.ULInt16('status_type'), + construct.ULInt16('unknown'), + construct.ULInt32('timestamp'), + construct.ULInt32('microsecond'), + construct.String('hostname', 256), + construct.Padding(64)) + + MAC_UTMPX_ENTRY_SIZE = MAC_UTMPX_ENTRY.sizeof() + + # 9, 10 and 11 are only for Darwin and IOS. + MAC_STATUS_TYPE = { + 0: 'EMPTY', + 1: 'RUN_LVL', + 2: 'BOOT_TIME', + 3: 'OLD_TIME', + 4: 'NEW_TIME', + 5: 'INIT_PROCESS', + 6: 'LOGIN_PROCESS', + 7: 'USER_PROCESS', + 8: 'DEAD_PROCESS', + 9: 'ACCOUNTING', + 10: 'SIGNATURE', + 11: 'SHUTDOWN_TIME'} + + def _ReadEntry(self, file_object): + """Reads an UTMPX entry. + + Args: + file_object: a file-like object that points to an UTMPX file. + + Returns: + An event object constructed from the UTMPX entry. + """ + data = file_object.read(self.MAC_UTMPX_ENTRY_SIZE) + if len(data) != self.MAC_UTMPX_ENTRY_SIZE: + return + + try: + entry = self.MAC_UTMPX_ENTRY.parse(data) + except (IOError, construct.FieldError) as exception: + logging.warning( + u'Unable to parse Mac OS X UTMPX entry with error: {0:s}'.format( + exception)) + return + + user, _, _ = entry.user.partition('\x00') + if not user: + user = u'N/A' + terminal, _, _ = entry.tty_name.partition('\x00') + if not terminal: + terminal = u'N/A' + computer_name, _, _ = entry.hostname.partition('\x00') + if not computer_name: + computer_name = u'localhost' + + value_status = self.MAC_STATUS_TYPE.get(entry.status_type, u'N/A') + status = u'{0}'.format(value_status) + + timestamp = timelib.Timestamp.FromPosixTimeWithMicrosecond( + entry.timestamp, entry.microsecond) + + return UtmpxMacOsXEvent(timestamp, user, terminal, status, computer_name) + + def _VerifyStructure(self, file_object): + """Verify that we are dealing with an UTMPX entry. + + Args: + file_object: a file-like object that points to an UTMPX file. + + Returns: + True if it is a UTMPX entry or False otherwise. + """ + # First entry is a SIGNAL entry of the file ("header"). + try: + header = self.MAC_UTMPX_ENTRY.parse_stream(file_object) + except (IOError, construct.FieldError): + return False + user, _, _ = header.user.partition('\x00') + + # The UTMPX_ENTRY structure will often successfully compile on various + # structures, such as binary plist files, and thus we need to do some + # additional validation. The first one is to check if the user name + # can be converted into a unicode string, otherwise we can assume + # we are dealing with non UTMPX data. + try: + _ = unicode(user) + except UnicodeDecodeError: + return False + + if user != u'utmpx-1.00': + return False + if self.MAC_STATUS_TYPE[header.status_type] != 'SIGNATURE': + return False + if header.timestamp != 0 or header.microsecond != 0 or header.pid != 0: + return False + tty_name, _, _ = header.tty_name.partition('\x00') + hostname, _, _ = header.hostname.partition('\x00') + if tty_name or hostname: + return False + + return True + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a UTMPX file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + if not self._VerifyStructure(file_object): + file_object.close() + raise errors.UnableToParseFile( + u'The file is not an UTMPX file.') + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + event_object = self._ReadEntry(file_object) + while event_object: + event_object.offset = file_object.tell() + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + event_object = self._ReadEntry(file_object) + + file_object.close() + + +manager.ParsersManager.RegisterParser(UtmpxParser) diff --git a/plaso/parsers/utmpx_test.py b/plaso/parsers/utmpx_test.py new file mode 100644 index 0000000..3ff07bd --- /dev/null +++ b/plaso/parsers/utmpx_test.py @@ -0,0 +1,94 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for UTMPX file parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import utmpx as utmpx_formatter +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import utmpx + + +class UtmpxParserTest(test_lib.ParserTestCase): + """Tests for utmpx file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = utmpx.UtmpxParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['utmpx_mac']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEqual(len(event_objects), 6) + + event_object = event_objects[0] + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-13 17:52:34') + self.assertEqual(event_object.timestamp, expected_timestamp) + + expected_msg_short = u'User: N/A' + expected_msg = ( + u'User: N/A Status: BOOT_TIME ' + u'Computer Name: localhost Terminal: N/A') + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[1] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-13 17:52:41.736713') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.user, u'moxilo') + self.assertEqual(event_object.terminal, u'console', ) + self.assertEqual(event_object.status, u'USER_PROCESS') + self.assertEqual(event_object.computer_name, u'localhost') + expected_msg = ( + u'User: moxilo Status: ' + u'USER_PROCESS ' + u'Computer Name: localhost ' + u'Terminal: console') + expected_msg_short = ( + u'User: moxilo') + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[4] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-11-14 04:32:56.641464') + self.assertEqual(event_object.timestamp, expected_timestamp) + + self.assertEqual(event_object.user, u'moxilo') + self.assertEqual(event_object.terminal, u'ttys002') + self.assertEqual(event_object.status, u'DEAD_PROCESS') + expected_msg = ( + u'User: moxilo Status: ' + u'DEAD_PROCESS ' + u'Computer Name: localhost ' + u'Terminal: ttys002') + expected_msg_short = ( + u'User: moxilo') + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winevt.py b/plaso/parsers/winevt.py new file mode 100644 index 0000000..49fb492 --- /dev/null +++ b/plaso/parsers/winevt.py @@ -0,0 +1,192 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Windows EventLog (EVT) files.""" + +import logging + +import pyevt + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.parsers import interface +from plaso.parsers import manager + + +class WinEvtRecordEvent(time_events.PosixTimeEvent): + """Convenience class for a Windows EventLog (EVT) record event.""" + + DATA_TYPE = 'windows:evt:record' + + def __init__( + self, timestamp, timestamp_description, evt_record, recovered=False): + """Initializes the event. + + Args: + timestamp: The POSIX timestamp value. + timestamp_description: A description string for the timestamp value. + evt_record: The EVT record (pyevt.record). + recovered: Boolean value to indicate the record was recovered, False + by default. + """ + super(WinEvtRecordEvent, self).__init__(timestamp, timestamp_description) + + self.recovered = recovered + self.offset = evt_record.offset + + try: + self.record_number = evt_record.identifier + except OverflowError as exception: + logging.warning( + u'Unable to assign the record identifier with error: {0:s}.'.format( + exception)) + try: + event_identifier = evt_record.event_identifier + except OverflowError as exception: + event_identifier = None + logging.warning( + u'Unable to assign the event identifier with error: {0:s}.'.format( + exception)) + + # We are only interest in the event identifier code to match the behavior + # of EVTX event records. + if event_identifier is not None: + self.event_identifier = event_identifier & 0xffff + self.facility = (event_identifier >> 16) & 0x0fff + self.severity = event_identifier >> 30 + self.message_identifier = event_identifier + + self.event_type = evt_record.event_type + self.event_category = evt_record.event_category + self.source_name = evt_record.source_name + + # Computer name is the value stored in the event record and does not + # necessarily corresponds with the actual hostname. + self.computer_name = evt_record.computer_name + self.user_sid = evt_record.user_security_identifier + + self.strings = list(evt_record.strings) + + +class WinEvtParser(interface.BaseParser): + """Parses Windows EventLog (EVT) files.""" + + NAME = 'winevt' + DESCRIPTION = u'Parser for Windows EventLog (EVT) files.' + + def _ParseRecord( + self, parser_context, evt_record, file_entry=None, parser_chain=None, + recovered=False): + """Extract data from a Windows EventLog (EVT) record. + + Args: + parser_context: A parser context object (instance of ParserContext). + evt_record: An event record (pyevt.record). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + recovered: Boolean value to indicate the record was recovered, False + by default. + """ + try: + creation_time = evt_record.get_creation_time_as_integer() + except OverflowError as exception: + logging.warning( + u'Unable to read the timestamp from record with error: {0:s}'.format( + exception)) + creation_time = 0 + + if creation_time: + event_object = WinEvtRecordEvent( + creation_time, eventdata.EventTimestamp.CREATION_TIME, + evt_record, recovered) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + try: + written_time = evt_record.get_written_time_as_integer() + except OverflowError as exception: + logging.warning( + u'Unable to read the timestamp from record with error: {0:s}'.format( + exception)) + written_time = 0 + + if written_time: + event_object = WinEvtRecordEvent( + written_time, eventdata.EventTimestamp.WRITTEN_TIME, + evt_record, recovered) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a Windows EventLog (EVT) file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + UnableToParseFile: when the file cannot be parsed. + """ + parser_chain = self._BuildParserChain(parser_chain) + + file_object = file_entry.GetFileObject() + evt_file = pyevt.file() + evt_file.set_ascii_codepage(parser_context.codepage) + + try: + evt_file.open_file_object(file_object) + except IOError as exception: + evt_file.close() + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s} with error: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + for record_index in range(0, evt_file.number_of_records): + try: + evt_record = evt_file.get_record(record_index) + self._ParseRecord( + parser_context, evt_record, file_entry=file_entry, + parser_chain=parser_chain) + except IOError as exception: + logging.warning(( + u'[{0:s}] unable to parse event record: {1:d} in file: {2:s} ' + u'with error: {3:s}').format( + self.NAME, record_index, file_entry.name, exception)) + + for record_index in range(0, evt_file.number_of_recovered_records): + try: + evt_record = evt_file.get_recovered_record(record_index) + self._ParseRecord( + parser_context, evt_record, file_entry=file_entry, + parser_chain=parser_chain, recovered=True) + except IOError as exception: + logging.info(( + u'[{0:s}] unable to parse recovered event record: {1:d} in file: ' + u'{2:s} with error: {3:s}').format( + self.NAME, record_index, file_entry.name, exception)) + + evt_file.close() + file_object.close() + + +manager.ParsersManager.RegisterParser(WinEvtParser) diff --git a/plaso/parsers/winevt_test.py b/plaso/parsers/winevt_test.py new file mode 100644 index 0000000..4972ecf --- /dev/null +++ b/plaso/parsers/winevt_test.py @@ -0,0 +1,120 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Windows EventLog (EVT) parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winevt as winevt_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import winevt + + +class WinEvtParserTest(test_lib.ParserTestCase): + """Tests for the Windows EventLog (EVT) parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = winevt.WinEvtParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['SysEvent.Evt']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # Windows Event Log (EVT) information: + # Version : 1.1 + # Number of records : 6063 + # Number of recovered records : 437 + # Log type : System + + self.assertEquals(len(event_objects), (6063 + 437) * 2) + + # Event number : 1392 + # Creation time : Jul 27, 2011 06:41:47 UTC + # Written time : Jul 27, 2011 06:41:47 UTC + # Event type : Warning event (2) + # Computer name : WKS-WINXP32BIT + # Source name : LSASRV + # Event category : 3 + # Event identifier : 0x8000a001 (2147524609) + # Number of strings : 2 + # String: 1 : cifs/CONTROLLER + # String: 2 : "The system detected a possible attempt to compromise + # security. Please ensure that you can contact the + # server that authenticated you.\r\n (0xc0000388)" + event_object = event_objects[1] + self.assertEquals(event_object.record_number, 1392) + self.assertEquals(event_object.event_type, 2) + self.assertEquals(event_object.computer_name, u'WKS-WINXP32BIT') + self.assertEquals(event_object.source_name, u'LSASRV') + self.assertEquals(event_object.event_category, 3) + self.assertEquals(event_object.event_identifier, 40961) + self.assertEquals(event_object.strings[0], u'cifs/CONTROLLER') + + expected_string = ( + u'"The system detected a possible attempt to compromise security. ' + u'Please ensure that you can contact the server that authenticated you.' + u'\r\n (0xc0000388)"') + + self.assertEquals(event_object.strings[1], expected_string) + + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-07-27 06:41:47') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + + event_object = event_objects[1] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-07-27 06:41:47') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.WRITTEN_TIME) + + expected_msg = ( + u'[40961 / 0xa001] ' + u'Severity: Warning ' + u'Record Number: 1392 ' + u'Event Type: Information event ' + u'Event Category: 3 ' + u'Source Name: LSASRV ' + u'Computer Name: WKS-WINXP32BIT ' + u'Strings: [u\'cifs/CONTROLLER\', ' + u'u\'"The system detected a possible attempt to ' + u'compromise security. Please ensure that you can ' + u'contact the server that authenticated you.\\r\\n ' + u'(0xc0000388)"\']') + + expected_msg_short = ( + u'[40961 / 0xa001] ' + u'Strings: [u\'cifs/CONTROLLER\', ' + u'u\'"The system detected a possi...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winevtx.py b/plaso/parsers/winevtx.py new file mode 100644 index 0000000..5f59731 --- /dev/null +++ b/plaso/parsers/winevtx.py @@ -0,0 +1,165 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Windows XML EventLog (EVTX) files.""" + +import logging + +import pyevtx + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.parsers import interface +from plaso.parsers import manager + + +if pyevtx.get_version() < '20141112': + raise ImportWarning('WinEvtxParser requires at least pyevtx 20141112.') + + +class WinEvtxRecordEvent(time_events.FiletimeEvent): + """Convenience class for a Windows XML EventLog (EVTX) record event.""" + DATA_TYPE = 'windows:evtx:record' + + def __init__(self, evtx_record, recovered=False): + """Initializes the event. + + Args: + evtx_record: The EVTX record (pyevtx.record). + recovered: Boolean value to indicate the record was recovered, False + by default. + """ + try: + timestamp = evtx_record.get_written_time_as_integer() + except OverflowError as exception: + logging.warning( + u'Unable to read the timestamp from record with error: {0:s}'.format( + exception)) + timestamp = 0 + + super(WinEvtxRecordEvent, self).__init__( + timestamp, eventdata.EventTimestamp.WRITTEN_TIME) + + self.recovered = recovered + self.offset = evtx_record.offset + + try: + self.record_number = evtx_record.identifier + except OverflowError as exception: + logging.warning( + u'Unable to assign the record number with error: {0:s}.'.format( + exception)) + + try: + event_identifier = evtx_record.event_identifier + except OverflowError as exception: + event_identifier = None + logging.warning( + u'Unable to assign the event identifier with error: {0:s}.'.format( + exception)) + + try: + event_identifier_qualifiers = evtx_record.event_identifier_qualifiers + except OverflowError as exception: + event_identifier_qualifiers = None + logging.warning(( + u'Unable to assign the event identifier qualifiers with error: ' + u'{0:s}.').format(exception)) + + if event_identifier is not None: + self.event_identifier = event_identifier + + if event_identifier_qualifiers is not None: + self.message_identifier = ( + (event_identifier_qualifiers << 16) | event_identifier) + + self.event_level = evtx_record.event_level + self.source_name = evtx_record.source_name + + # Computer name is the value stored in the event record and does not + # necessarily corresponds with the actual hostname. + self.computer_name = evtx_record.computer_name + self.user_sid = evtx_record.user_security_identifier + + self.strings = list(evtx_record.strings) + + self.xml_string = evtx_record.xml_string + + +class WinEvtxParser(interface.BaseParser): + """Parses Windows XML EventLog (EVTX) files.""" + + NAME = 'winevtx' + DESCRIPTION = u'Parser for Windows XML EventLog (EVTX) files.' + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a Windows XML EventLog (EVTX) file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + UnableToParseFile: when the file cannot be parsed. + """ + parser_chain = self._BuildParserChain(parser_chain) + + file_object = file_entry.GetFileObject() + evtx_file = pyevtx.file() + evtx_file.set_ascii_codepage(parser_context.codepage) + + try: + evtx_file.open_file_object(file_object) + except IOError as exception: + evtx_file.close() + file_object.close() + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s} with error: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + for record_index in range(0, evtx_file.number_of_records): + try: + evtx_record = evtx_file.get_record(record_index) + event_object = WinEvtxRecordEvent(evtx_record) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + except IOError as exception: + logging.warning(( + u'[{0:s}] unable to parse event record: {1:d} in file: {2:s} ' + u'with error: {3:s}').format( + self.NAME, record_index, file_entry.name, exception)) + + for record_index in range(0, evtx_file.number_of_recovered_records): + try: + evtx_record = evtx_file.get_recovered_record(record_index) + event_object = WinEvtxRecordEvent(evtx_record, recovered=True) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + except IOError as exception: + logging.debug(( + u'[{0:s}] unable to parse recovered event record: {1:d} in file: ' + u'{2:s} with error: {3:s}').format( + self.NAME, record_index, file_entry.name, exception)) + + evtx_file.close() + file_object.close() + + +manager.ParsersManager.RegisterParser(WinEvtxParser) diff --git a/plaso/parsers/winevtx_test.py b/plaso/parsers/winevtx_test.py new file mode 100644 index 0000000..a980893 --- /dev/null +++ b/plaso/parsers/winevtx_test.py @@ -0,0 +1,138 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Windows XML EventLog (EVTX) parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winevtx as winevtx_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import winevtx + + +class WinEvtxParserTest(test_lib.ParserTestCase): + """Tests for the Windows XML EventLog (EVTX) parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = winevtx.WinEvtxParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['System.evtx']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # Windows Event Viewer Log (EVTX) information: + # Version : 3.1 + # Number of records : 1601 + # Number of recovered records : 0 + # Log type : System + + self.assertEquals(len(event_objects), 1601) + + # Event number : 12049 + # Written time : Mar 14, 2012 04:17:43.354562700 UTC + # Event level : Information (4) + # Computer name : WKS-WIN764BITB.shieldbase.local + # Provider identifier : {fc65ddd8-d6ef-4962-83d5-6e5cfe9ce148} + # Source name : Microsoft-Windows-Eventlog + # Event identifier : 0x00000069 (105) + # Number of strings : 2 + # String: 1 : System + # String: 2 : C:\Windows\System32\Winevt\Logs\ + # : Archive-System-2012-03-14-04-17-39-932.evtx + + event_object = event_objects[0] + + self.assertEquals(event_object.record_number, 12049) + expected_computer_name = u'WKS-WIN764BITB.shieldbase.local' + self.assertEquals(event_object.computer_name, expected_computer_name) + self.assertEquals(event_object.source_name, u'Microsoft-Windows-Eventlog') + self.assertEquals(event_object.event_level, 4) + self.assertEquals(event_object.event_identifier, 105) + + self.assertEquals(event_object.strings[0], u'System') + + expected_string = ( + u'C:\\Windows\\System32\\Winevt\\Logs\\' + u'Archive-System-2012-03-14-04-17-39-932.evtx') + + self.assertEquals(event_object.strings[1], expected_string) + + event_object = event_objects[1] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-03-14 04:17:38.276340') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.WRITTEN_TIME) + + expected_xml_string = ( + u'\n' + u' \n' + u' \n' + u' 7036\n' + u' 0\n' + u' 4\n' + u' 0\n' + u' 0\n' + u' 0x8080000000000000\n' + u' \n' + u' 12050\n' + u' \n' + u' \n' + u' System\n' + u' WKS-WIN764BITB.shieldbase.local\n' + u' \n' + u' \n' + u' \n' + u' Windows Modules Installer\n' + u' stopped\n' + u' 540072007500730074006500640049006E007300740061006C006C00' + u'650072002F0031000000\n' + u' \n' + u'\n') + + self.assertEquals(event_object.xml_string, expected_xml_string) + + expected_msg = ( + u'[7036 / 0x1b7c] ' + u'Record Number: 12050 ' + u'Event Level: 4 ' + u'Source Name: Service Control Manager ' + u'Computer Name: WKS-WIN764BITB.shieldbase.local ' + u'Strings: [u\'Windows Modules Installer\', ' + u'u\'stopped\', u\'540072007500730074006500640049006E00' + u'7300740061006C006C00650072002F0031000000\']') + + expected_msg_short = ( + u'[7036 / 0x1b7c] ' + u'Strings: [u\'Windows Modules Installer\', ' + u'u\'stopped\', u\'5400720...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winfirewall.py b/plaso/parsers/winfirewall.py new file mode 100644 index 0000000..90adb1b --- /dev/null +++ b/plaso/parsers/winfirewall.py @@ -0,0 +1,197 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Windows Firewall Log file.""" + +import logging + +import pyparsing + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + +import pytz + + +class WinFirewallParser(text_parser.PyparsingSingleLineTextParser): + """Parses the Windows Firewall Log file. + + More information can be read here: + http://technet.microsoft.com/en-us/library/cc758040(v=ws.10).aspx + """ + + NAME = 'winfirewall' + DESCRIPTION = u'Parser for Windows Firewall Log files.' + + # TODO: Add support for custom field names. Currently this parser only + # supports the default fields, which are: + # date time action protocol src-ip dst-ip src-port dst-port size + # tcpflags tcpsyn tcpack tcpwin icmptype icmpcode info path + + # Define common structures. + BLANK = pyparsing.Literal('-') + WORD = pyparsing.Word(pyparsing.alphanums + '-') | BLANK + INT = pyparsing.Word(pyparsing.nums, min=1) | BLANK + IP = ( + text_parser.PyparsingConstants.IPV4_ADDRESS | + text_parser.PyparsingConstants.IPV6_ADDRESS | BLANK) + PORT = pyparsing.Word(pyparsing.nums, min=1, max=6) | BLANK + + # Define how a log line should look like. + LOG_LINE = ( + text_parser.PyparsingConstants.DATE.setResultsName('date') + + text_parser.PyparsingConstants.TIME.setResultsName('time') + + WORD.setResultsName('action') + WORD.setResultsName('protocol') + + IP.setResultsName('source_ip') + IP.setResultsName('dest_ip') + + PORT.setResultsName('source_port') + INT.setResultsName('dest_port') + + INT.setResultsName('size') + WORD.setResultsName('flags') + + INT.setResultsName('tcp_seq') + INT.setResultsName('tcp_ack') + + INT.setResultsName('tcp_win') + INT.setResultsName('icmp_type') + + INT.setResultsName('icmp_code') + WORD.setResultsName('info') + + WORD.setResultsName('path')) + + # Define the available log line structures. + LINE_STRUCTURES = [ + ('comment', text_parser.PyparsingConstants.COMMENT_LINE_HASH), + ('logline', LOG_LINE), + ] + + DATA_TYPE = 'windows:firewall:log_entry' + + def __init__(self): + """Initializes a parser object.""" + super(WinFirewallParser, self).__init__() + self.version = None + self.use_local_zone = False + self.software = None + + def VerifyStructure(self, parser_context, line): + """Verify that this file is a firewall log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + line: A single line from the text file. + + Returns: + True if this is the correct parser, False otherwise. + """ + # TODO: Examine other versions of the file format and if this parser should + # support them. + if line == '#Version: 1.5': + return True + + return False + + def ParseRecord(self, parser_context, key, structure): + """Parse each record structure and return an event object if applicable. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + if key == 'comment': + self._ParseCommentRecord(structure) + elif key == 'logline': + return self._ParseLogLine(parser_context, structure) + else: + logging.warning( + u'Unable to parse record, unknown structure: {0:s}'.format(key)) + + def _ParseCommentRecord(self, structure): + """Parse a comment and store appropriate attributes. + + Args: + structure: A pyparsing.ParseResults object from a line in the + log file. + """ + comment = structure[1] + if comment.startswith('Version'): + _, _, self.version = comment.partition(':') + elif comment.startswith('Software'): + _, _, self.software = comment.partition(':') + elif comment.startswith('Time'): + _, _, time_format = comment.partition(':') + if 'local' in time_format.lower(): + self.use_local_zone = True + + def _ParseLogLine(self, parser_context, structure): + """Parse a single log line and return an event object. + + Args: + parser_context: A parser context object (instance of ParserContext). + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + log_dict = structure.asDict() + + date = log_dict.get('date', None) + time = log_dict.get('time', None) + + if not (date and time): + logging.warning(u'Unable to extract timestamp from Winfirewall logline.') + return + + year, month, day = date + hour, minute, second = time + if self.use_local_zone: + zone = parser_context.timezone + else: + zone = pytz.utc + + timestamp = timelib.Timestamp.FromTimeParts( + year, month, day, hour, minute, second, timezone=zone) + + if not timestamp: + return + + # TODO: refactor this into a WinFirewall specific event object. + event_object = time_events.TimestampEvent( + timestamp, eventdata.EventTimestamp.WRITTEN_TIME, self.DATA_TYPE) + + for key, value in log_dict.items(): + if key in ('time', 'date'): + continue + if value == '-': + continue + + if type(value) is pyparsing.ParseResults: + print value + setattr(event_object, key, ''.join(value)) + else: + try: + save_value = int(value) + except ValueError: + save_value = value + + setattr(event_object, key, save_value) + + return event_object + + +manager.ParsersManager.RegisterParser(WinFirewallParser) diff --git a/plaso/parsers/winfirewall_test.py b/plaso/parsers/winfirewall_test.py new file mode 100644 index 0000000..382c281 --- /dev/null +++ b/plaso/parsers/winfirewall_test.py @@ -0,0 +1,83 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Windows firewall log parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winfirewall as winfirewall_formatter +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import winfirewall + + +class WinFirewallParserTest(test_lib.ParserTestCase): + """Tests for the Windows firewall log parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = winfirewall.WinFirewallParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['firewall.log']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 15) + + event_object = event_objects[4] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2005-04-11 08:06:02') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(event_object.source_ip, '123.45.78.90') + self.assertEquals(event_object.dest_ip, '123.156.78.90') + + event_object = event_objects[7] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2005-04-11 08:06:26') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(event_object.size, 576) + self.assertEquals(event_object.flags, 'A') + self.assertEquals(event_object.tcp_ack, 987654321) + + expected_msg = ( + u'DROP [ TCP RECEIVE ] ' + u'From: 123.45.78.90 :80 > 123.156.78.90 :1774 ' + u'Size (bytes): 576 ' + u'Flags [A] ' + u'TCP Seq Number: 123456789 ' + u'TCP ACK Number: 987654321 ' + u'TCP Window Size (bytes): 12345') + expected_msg_short = ( + u'DROP [TCP] 123.45.78.90 : 80 > 123.156.78.90 : 1774') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[9] + + self.assertEquals(event_object.icmp_type, 8) + self.assertEquals(event_object.icmp_code, 0) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winjob.py b/plaso/parsers/winjob.py new file mode 100644 index 0000000..d9e3ef5 --- /dev/null +++ b/plaso/parsers/winjob.py @@ -0,0 +1,270 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Windows Scheduled Task job files.""" + +import construct + +from plaso.events import time_events +from plaso.lib import binary +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import interface +from plaso.parsers import manager + + +__author__ = 'Brian Baskin (brian@thebaskins.com)' + + +class WinJobEvent(time_events.TimestampEvent): + """Convenience class for a Windows Scheduled Task event.""" + + DATA_TYPE = 'windows:tasks:job' + + def __init__( + self, timestamp, timestamp_description, application, parameter, + working_dir, username, trigger, comment): + """Initializes the event object. + + Args: + timestamp: The timestamp value. + timestamp_description: The usage string for the timestamp value. + application: Path to job executable. + parameter: Application command line parameters. + working_dir: Working path for task. + username: User job was scheduled from. + trigger: Trigger event that runs the task, e.g. DAILY. + comment: Optional description about the job. + """ + super(WinJobEvent, self).__init__(timestamp, timestamp_description) + self.application = binary.ReadUtf16(application) + self.parameter = binary.ReadUtf16(parameter) + self.working_dir = binary.ReadUtf16(working_dir) + self.username = binary.ReadUtf16(username) + self.trigger = trigger + self.comment = binary.ReadUtf16(comment) + + +class WinJobParser(interface.BaseParser): + """Parse Windows Scheduled Task files for job events.""" + + NAME = 'winjob' + DESCRIPTION = u'Parser for Windows Scheduled Task job (or At-job) files.' + + PRODUCT_VERSIONS = { + 0x0400:'Windows NT 4.0', + 0x0500:'Windows 2000', + 0x0501:'Windows XP', + 0x0600:'Windows Vista', + 0x0601:'Windows 7', + 0x0602:'Windows 8', + 0x0603:'Windows 8.1' + } + + TRIGGER_TYPES = { + 0x0000:'ONCE', + 0x0001:'DAILY', + 0x0002:'WEEKLY', + 0x0003:'MONTHLYDATE', + 0x0004:'MONTHLYDOW', + 0x0005:'EVENT_ON_IDLE', + 0x0006:'EVENT_AT_SYSTEMSTART', + 0x0007:'EVENT_AT_LOGON' + } + + JOB_FIXED_STRUCT = construct.Struct( + 'job_fixed', + construct.ULInt16('product_version'), + construct.ULInt16('file_version'), + construct.Bytes('job_uuid', 16), + construct.ULInt16('app_name_len_offset'), + construct.ULInt16('trigger_offset'), + construct.ULInt16('error_retry_count'), + construct.ULInt16('error_retry_interval'), + construct.ULInt16('idle_deadline'), + construct.ULInt16('idle_wait'), + construct.ULInt32('priority'), + construct.ULInt32('max_run_time'), + construct.ULInt32('exit_code'), + construct.ULInt32('status'), + construct.ULInt32('flags'), + construct.ULInt16('ran_year'), + construct.ULInt16('ran_month'), + construct.ULInt16('ran_weekday'), + construct.ULInt16('ran_day'), + construct.ULInt16('ran_hour'), + construct.ULInt16('ran_minute'), + construct.ULInt16('ran_second'), + construct.ULInt16('ran_millisecond'), + ) + + # Using Construct's utf-16 encoding here will create strings with their + # null terminators exposed. Instead, we'll read these variables raw and + # convert them using Plaso's ReadUtf16() for proper formatting. + JOB_VARIABLE_STRUCT = construct.Struct( + 'job_variable', + construct.ULInt16('running_instance_count'), + construct.ULInt16('app_name_len'), + construct.String( + 'app_name', + lambda ctx: ctx.app_name_len * 2), + construct.ULInt16('parameter_len'), + construct.String( + 'parameter', + lambda ctx: ctx.parameter_len * 2), + construct.ULInt16('working_dir_len'), + construct.String( + 'working_dir', + lambda ctx: ctx.working_dir_len * 2), + construct.ULInt16('username_len'), + construct.String( + 'username', + lambda ctx: ctx.username_len * 2), + construct.ULInt16('comment_len'), + construct.String( + 'comment', + lambda ctx: ctx.comment_len * 2), + construct.ULInt16('userdata_len'), + construct.String( + 'userdata', + lambda ctx: ctx.userdata_len), + construct.ULInt16('reserved_len'), + construct.String( + 'reserved', + lambda ctx: ctx.reserved_len), + construct.ULInt16('test'), + construct.ULInt16('trigger_size'), + construct.ULInt16('trigger_reserved1'), + construct.ULInt16('sched_start_year'), + construct.ULInt16('sched_start_month'), + construct.ULInt16('sched_start_day'), + construct.ULInt16('sched_end_year'), + construct.ULInt16('sched_end_month'), + construct.ULInt16('sched_end_day'), + construct.ULInt16('sched_start_hour'), + construct.ULInt16('sched_start_minute'), + construct.ULInt32('sched_duration'), + construct.ULInt32('sched_interval'), + construct.ULInt32('trigger_flags'), + construct.ULInt32('trigger_type'), + construct.ULInt16('trigger_arg0'), + construct.ULInt16('trigger_arg1'), + construct.ULInt16('trigger_arg2'), + construct.ULInt16('trigger_padding'), + construct.ULInt16('trigger_reserved2'), + construct.ULInt16('trigger_reserved3')) + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a Windows job file. + + This is the main parsing engine for the parser. It determines if + the selected file is a proper Scheduled task job file and extracts + the scheduled task data. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + UnableToParseFile: when the file cannot be parsed. + """ + parser_chain = self._BuildParserChain(parser_chain) + + file_object = file_entry.GetFileObject() + try: + header = self.JOB_FIXED_STRUCT.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + raise errors.UnableToParseFile( + u'Unable to parse Windows Task Job file with error: {0:s}'.format( + exception)) + + if not header.product_version in self.PRODUCT_VERSIONS: + raise errors.UnableToParseFile(u'Not a valid Scheduled Task file') + + if not header.file_version == 1: + raise errors.UnableToParseFile(u'Not a valid Scheduled Task file') + + # Obtain the relevant values from the file. + try: + data = self.JOB_VARIABLE_STRUCT.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + raise errors.UnableToParseFile( + u'Unable to parse Windows Task Job file with error: {0:s}'.format( + exception)) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + trigger_type = self.TRIGGER_TYPES.get(data.trigger_type, u'Unknown') + + last_run_date = timelib.Timestamp.FromTimeParts( + header.ran_year, + header.ran_month, + header.ran_day, + header.ran_hour, + header.ran_minute, + header.ran_second, + microseconds=(header.ran_millisecond * 1000), + timezone=parser_context.timezone) + + scheduled_date = timelib.Timestamp.FromTimeParts( + data.sched_start_year, + data.sched_start_month, + data.sched_start_day, + data.sched_start_hour, + data.sched_start_minute, + 0, # Seconds are not stored. + timezone=parser_context.timezone) + + # Create two timeline events, one for created date and the other for last + # run. + parser_context.ProduceEvents( + [WinJobEvent( + last_run_date, eventdata.EventTimestamp.LAST_RUNTIME, data.app_name, + data.parameter, data.working_dir, data.username, trigger_type, + data.comment), + WinJobEvent( + scheduled_date, u'Scheduled To Start', data.app_name, + data.parameter, data.working_dir, data.username, trigger_type, + data.comment)], + parser_chain=parser_chain, file_entry=file_entry) + + # A scheduled end date is optional. + if data.sched_end_year: + scheduled_end_date = timelib.Timestamp.FromTimeParts( + data.sched_end_year, + data.sched_end_month, + data.sched_end_day, + 0, # Hours are not stored. + 0, # Minutes are not stored. + 0, # Seconds are not stored. + timezone=parser_context.timezone) + + event_object = WinJobEvent( + scheduled_end_date, 'Scheduled To End', data.app_name, data.parameter, + data.working_dir, data.username, trigger_type, data.comment) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + file_object.close() + + +manager.ParsersManager.RegisterParser(WinJobParser) diff --git a/plaso/parsers/winjob_test.py b/plaso/parsers/winjob_test.py new file mode 100644 index 0000000..c5af40b --- /dev/null +++ b/plaso/parsers/winjob_test.py @@ -0,0 +1,101 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Windows Scheduled Task job file parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winjob as winjob_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import winjob + + +class WinJobTest(test_lib.ParserTestCase): + """Tests for the Windows Scheduled Task job file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = winjob.WinJobParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['wintask.job']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 2) + + event_object = event_objects[0] + + application_expected = ( + u'C:\\Program Files (x86)\\Google\\Update\\GoogleUpdate.exe') + self.assertEqual(event_object.application, application_expected) + + username_expected = u'Brian' + self.assertEqual(event_object.username, username_expected) + + description_expected = eventdata.EventTimestamp.LAST_RUNTIME + self.assertEqual(event_object.timestamp_desc, description_expected) + + trigger_expected = u'DAILY' + self.assertEqual(event_object.trigger, trigger_expected) + + comment_expected = ( + u'Keeps your Google software up to date. If this task is disabled or ' + u'stopped, your Google software will not be kept up to date, meaning ' + u'security vulnerabilities that may arise cannot be fixed and ' + u'features may not work. This task uninstalls itself when there is ' + u'no Google software using it.') + self.assertEqual(event_object.comment, comment_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-08-24 12:42:00.112') + self.assertEqual(event_object.timestamp, expected_timestamp) + + # Parse second event. Same metadata; different timestamp event. + event_object = event_objects[1] + + self.assertEqual(event_object.application, application_expected) + self.assertEqual(event_object.username, username_expected) + self.assertEqual(event_object.trigger, trigger_expected) + self.assertEqual(event_object.comment, comment_expected) + + description_expected = u'Scheduled To Start' + self.assertEqual(event_object.timestamp_desc, description_expected) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-12 15:42:00') + self.assertEqual(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'Application: C:\\Program Files (x86)\\Google\\Update\\' + u'GoogleUpdate.exe /ua /installsource scheduler ' + u'Scheduled by: Brian ' + u'Run Iteration: DAILY') + + expected_msg_short = ( + u'Application: C:\\Program Files (x86)\\Google\\Update\\' + u'GoogleUpdate.exe /ua /insta...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winlnk.py b/plaso/parsers/winlnk.py new file mode 100644 index 0000000..0e4d4c3 --- /dev/null +++ b/plaso/parsers/winlnk.py @@ -0,0 +1,158 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Windows Shortcut (LNK) files.""" + +import pylnk + +from plaso.events import time_events +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.parsers import interface +from plaso.parsers import manager +from plaso.parsers.shared import shell_items + + +if pylnk.get_version() < '20141026': + raise ImportWarning('WinLnkParser requires at least pylnk 20141026.') + + +class WinLnkLinkEvent(time_events.FiletimeEvent): + """Convenience class for a Windows Shortcut (LNK) link event.""" + + DATA_TYPE = 'windows:lnk:link' + + def __init__(self, timestamp, timestamp_description, lnk_file, link_target): + """Initializes the event object. + + Args: + timestamp: The FILETIME value for the timestamp. + timestamp_description: The usage string for the timestamp value. + lnk_file: The LNK file (instance of pylnk.file). + link_target: String representation of the link target shell item list + or None. + """ + super(WinLnkLinkEvent, self).__init__(timestamp, timestamp_description) + + self.offset = 0 + self.file_size = lnk_file.file_size + self.file_attribute_flags = lnk_file.file_attribute_flags + self.drive_type = lnk_file.drive_type + self.drive_serial_number = lnk_file.drive_serial_number + self.description = lnk_file.description + self.volume_label = lnk_file.volume_label + self.local_path = lnk_file.local_path + self.network_path = lnk_file.network_path + self.command_line_arguments = lnk_file.command_line_arguments + self.env_var_location = lnk_file.environment_variables_location + self.relative_path = lnk_file.relative_path + self.working_directory = lnk_file.working_directory + self.icon_location = lnk_file.icon_location + + if link_target: + self.link_target = link_target + + +class WinLnkParser(interface.BaseParser): + """Parses Windows Shortcut (LNK) files.""" + + NAME = 'lnk' + DESCRIPTION = u'Parser for Windows Shortcut (LNK) files.' + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a Windows Shortcut (LNK) file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + file_object = file_entry.GetFileObject() + self.ParseFileObject( + parser_context, file_object, file_entry=file_entry, + parser_chain=parser_chain) + file_object.close() + + def ParseFileObject( + self, parser_context, file_object, file_entry=None, parser_chain=None, + display_name=None): + """Parses a Windows Shortcut (LNK) file. + + The file entry is used to determine the display name if it was not provided. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_object: A file-like object. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + display_name: Optional display name. + + Raises: + UnableToParseFile: when the file cannot be parsed. + """ + parser_chain = self._BuildParserChain(parser_chain) + + if not display_name and file_entry: + display_name = parser_context.GetDisplayName(file_entry) + + lnk_file = pylnk.file() + lnk_file.set_ascii_codepage(parser_context.codepage) + + try: + lnk_file.open_file_object(file_object) + except IOError as exception: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file {1:s} with error: {2:s}'.format( + self.NAME, display_name, exception)) + + link_target = None + if lnk_file.link_target_identifier_data: + # TODO: change file_entry.name to display name once it is generated + # correctly. + if file_entry: + display_name = file_entry.name + + shell_items_parser = shell_items.ShellItemsParser(display_name) + shell_items_parser.Parse( + parser_context, lnk_file.link_target_identifier_data, + codepage=parser_context.codepage, file_entry=file_entry, + parser_chain=parser_chain) + + link_target = shell_items_parser.CopyToPath() + + parser_context.ProduceEvents( + [WinLnkLinkEvent( + lnk_file.get_file_access_time_as_integer(), + eventdata.EventTimestamp.ACCESS_TIME, lnk_file, link_target), + WinLnkLinkEvent( + lnk_file.get_file_creation_time_as_integer(), + eventdata.EventTimestamp.CREATION_TIME, lnk_file, link_target), + WinLnkLinkEvent( + lnk_file.get_file_modification_time_as_integer(), + eventdata.EventTimestamp.MODIFICATION_TIME, lnk_file, + link_target)], + parser_chain=parser_chain, file_entry=file_entry) + + # TODO: add support for the distributed link tracker. + + lnk_file.close() + + +manager.ParsersManager.RegisterParser(WinLnkParser) diff --git a/plaso/parsers/winlnk_test.py b/plaso/parsers/winlnk_test.py new file mode 100644 index 0000000..14c4017 --- /dev/null +++ b/plaso/parsers/winlnk_test.py @@ -0,0 +1,170 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Windows Shortcut (LNK) parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winlnk as winlnk_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import winlnk + + +class WinLnkParserTest(test_lib.ParserTestCase): + """Tests for the Windows Shortcut (LNK) parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = winlnk.WinLnkParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['example.lnk']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + # Link information: + # Creation time : Jul 13, 2009 23:29:02.849131000 UTC + # Modification time : Jul 14, 2009 01:39:18.220000000 UTC + # Access time : Jul 13, 2009 23:29:02.849131000 UTC + # Description : @%windir%\system32\migwiz\wet.dll,-590 + # Relative path : .\migwiz\migwiz.exe + # Working directory : %windir%\system32\migwiz + # Icon location : %windir%\system32\migwiz\migwiz.exe + # Environment variables location : %windir%\system32\migwiz\migwiz.exe + + self.assertEqual(len(event_objects), 3) + + # A shortcut event object. + event_object = event_objects[0] + + expected_string = u'@%windir%\\system32\\migwiz\\wet.dll,-590' + self.assertEquals(event_object.description, expected_string) + + expected_string = u'.\\migwiz\\migwiz.exe' + self.assertEquals(event_object.relative_path, expected_string) + + expected_string = u'%windir%\\system32\\migwiz' + self.assertEquals(event_object.working_directory, expected_string) + + expected_string = u'%windir%\\system32\\migwiz\\migwiz.exe' + self.assertEquals(event_object.icon_location, expected_string) + self.assertEquals(event_object.env_var_location, expected_string) + + # The last accessed timestamp. + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-07-13 23:29:02.849131') + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.ACCESS_TIME) + self.assertEquals(event_object.timestamp, expected_timestamp) + + # The creation timestamp. + event_object = event_objects[1] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-07-13 23:29:02.849131') + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + self.assertEquals(event_object.timestamp, expected_timestamp) + + # The last modification timestamp. + event_object = event_objects[2] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-07-14 01:39:18.220000') + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.MODIFICATION_TIME) + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[@%windir%\\system32\\migwiz\\wet.dll,-590] ' + u'File size: 544768 ' + u'File attribute flags: 0x00000020 ' + u'env location: %windir%\\system32\\migwiz\\migwiz.exe ' + u'Relative path: .\\migwiz\\migwiz.exe ' + u'Working dir: %windir%\\system32\\migwiz ' + u'Icon location: %windir%\\system32\\migwiz\\migwiz.exe') + + expected_msg_short = ( + u'[@%windir%\\system32\\migwiz\\wet.dll,-590]') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + def testParseLinkTargetIdentifier(self): + """Tests the Parse function on an LNK with a link target identifier.""" + test_file = self._GetTestFilePath(['NeroInfoTool.lnk']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEqual(len(event_objects), 18) + + # A shortcut event object. + event_object = event_objects[16] + + expected_msg = ( + u'[Nero InfoTool provides you with information about the most ' + u'important features of installed drives, inserted discs, installed ' + u'software and much more. With Nero InfoTool you can find out all ' + u'about your drive and your system configuration.] ' + u'File size: 4635160 ' + u'File attribute flags: 0x00000020 ' + u'Drive type: 3 ' + u'Drive serial number: 0x70ecfa33 ' + u'Volume label: OS ' + u'Local path: C:\\Program Files (x86)\\Nero\\Nero 9\\Nero InfoTool\\' + u'InfoTool.exe ' + u'cmd arguments: -ScParameter=30002 ' + u'Relative path: ..\\..\\..\\..\\..\\..\\..\\..\\Program Files (x86)\\' + u'Nero\\Nero 9\\Nero InfoTool\\InfoTool.exe ' + u'Working dir: C:\\Program Files (x86)\\Nero\\Nero 9\\Nero InfoTool ' + u'Icon location: %ProgramFiles%\\Nero\\Nero 9\\Nero InfoTool\\' + u'InfoTool.exe ' + u'Link target: [My Computer, C:\\, Program Files (x86), Nero, Nero 9, ' + u'Nero InfoTool, InfoTool.exe]') + + expected_msg_short = ( + u'[Nero InfoTool provides you with information about the most ' + u'important feature...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + # A shell item event object. + event_object = event_objects[12] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-06-05 20:13:20') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'Name: InfoTool.exe ' + u'Long name: InfoTool.exe ' + u'NTFS file reference: 81349-1 ' + u'Origin: NeroInfoTool.lnk') + + expected_msg_short = ( + u'Name: InfoTool.exe ' + u'NTFS file reference: 81349-1 ' + u'Origin: NeroInfoTool.lnk') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winprefetch.py b/plaso/parsers/winprefetch.py new file mode 100644 index 0000000..d5bf21a --- /dev/null +++ b/plaso/parsers/winprefetch.py @@ -0,0 +1,504 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Windows Prefetch files.""" + +import logging +import os + +import construct + +from plaso.events import time_events +from plaso.events import windows_events +from plaso.lib import binary +from plaso.lib import errors +from plaso.lib import eventdata +from plaso.parsers import interface +from plaso.parsers import manager + + +class WinPrefetchExecutionEvent(time_events.FiletimeEvent): + """Class that defines a Windows Prefetch execution event.""" + + DATA_TYPE = 'windows:prefetch:execution' + + def __init__( + self, timestamp, timestamp_description, file_header, file_information, + mapped_files, path, volume_serial_numbers, volume_device_paths): + """Initializes the event. + + Args: + timestamp: The FILETIME timestamp value. + timestamp_description: The usage string for the timestamp value. + file_header: The file header construct object. + file_information: The file information construct object. + mapped_files: A list of the mapped filenames. + path: A path to the executable. + volume_serial_numbers: A list of volume serial number strings. + volume_device_paths: A list of volume device path strings. + """ + super(WinPrefetchExecutionEvent, self).__init__( + timestamp, timestamp_description) + + self.offset = 0 + + self.version = file_header.get('version', None) + self.executable = binary.Ut16StreamCopyToString( + file_header.get('executable', '')) + self.prefetch_hash = file_header.get('prefetch_hash', None) + + self.run_count = file_information.get('run_count', None) + self.mapped_files = mapped_files + self.path = path + + self.number_of_volumes = file_information.get('number_of_volumes', 0) + self.volume_serial_numbers = volume_serial_numbers + self.volume_device_paths = volume_device_paths + + +class WinPrefetchParser(interface.BaseParser): + """A parser for Windows Prefetch files.""" + + NAME = 'prefetch' + DESCRIPTION = u'Parser for Windows Prefetch files.' + + FILE_SIGNATURE = 'SCCA' + + FILE_HEADER_STRUCT = construct.Struct( + 'file_header', + construct.ULInt32('version'), + construct.String('signature', 4), + construct.Padding(4), + construct.ULInt32('file_size'), + construct.String('executable', 60), + construct.ULInt32('prefetch_hash'), + construct.ULInt32('flags')) + + FILE_INFORMATION_V17 = construct.Struct( + 'file_information_v17', + construct.ULInt32('metrics_array_offset'), + construct.ULInt32('number_of_metrics_array_entries'), + construct.ULInt32('trace_chains_array_offset'), + construct.ULInt32('number_of_trace_chains_array_entries'), + construct.ULInt32('filename_strings_offset'), + construct.ULInt32('filename_strings_size'), + construct.ULInt32('volumes_information_offset'), + construct.ULInt32('number_of_volumes'), + construct.ULInt32('volumes_information_size'), + construct.ULInt64('last_run_time'), + construct.Padding(16), + construct.ULInt32('run_count'), + construct.Padding(4)) + + FILE_INFORMATION_V23 = construct.Struct( + 'file_information_v23', + construct.ULInt32('metrics_array_offset'), + construct.ULInt32('number_of_metrics_array_entries'), + construct.ULInt32('trace_chains_array_offset'), + construct.ULInt32('number_of_trace_chains_array_entries'), + construct.ULInt32('filename_strings_offset'), + construct.ULInt32('filename_strings_size'), + construct.ULInt32('volumes_information_offset'), + construct.ULInt32('number_of_volumes'), + construct.ULInt32('volumes_information_size'), + construct.Padding(8), + construct.ULInt64('last_run_time'), + construct.Padding(16), + construct.ULInt32('run_count'), + construct.Padding(84)) + + FILE_INFORMATION_V26 = construct.Struct( + 'file_information_v26', + construct.ULInt32('metrics_array_offset'), + construct.ULInt32('number_of_metrics_array_entries'), + construct.ULInt32('trace_chains_array_offset'), + construct.ULInt32('number_of_trace_chains_array_entries'), + construct.ULInt32('filename_strings_offset'), + construct.ULInt32('filename_strings_size'), + construct.ULInt32('volumes_information_offset'), + construct.ULInt32('number_of_volumes'), + construct.ULInt32('volumes_information_size'), + construct.Padding(8), + construct.ULInt64('last_run_time'), + construct.ULInt64('last_run_time1'), + construct.ULInt64('last_run_time2'), + construct.ULInt64('last_run_time3'), + construct.ULInt64('last_run_time4'), + construct.ULInt64('last_run_time5'), + construct.ULInt64('last_run_time6'), + construct.ULInt64('last_run_time7'), + construct.Padding(16), + construct.ULInt32('run_count'), + construct.Padding(96)) + + METRICS_ARRAY_ENTRY_V17 = construct.Struct( + 'metrics_array_entry_v17', + construct.ULInt32('start_time'), + construct.ULInt32('duration'), + construct.ULInt32('filename_string_offset'), + construct.ULInt32('filename_string_number_of_characters'), + construct.Padding(4)) + + # Note that at the moment for the purpose of this parser + # the v23 and v26 metrics array entry structures are the same. + METRICS_ARRAY_ENTRY_V23 = construct.Struct( + 'metrics_array_entry_v23', + construct.ULInt32('start_time'), + construct.ULInt32('duration'), + construct.ULInt32('average_duration'), + construct.ULInt32('filename_string_offset'), + construct.ULInt32('filename_string_number_of_characters'), + construct.Padding(4), + construct.ULInt64('file_reference')) + + VOLUME_INFORMATION_V17 = construct.Struct( + 'volume_information_v17', + construct.ULInt32('device_path_offset'), + construct.ULInt32('device_path_number_of_characters'), + construct.ULInt64('creation_time'), + construct.ULInt32('serial_number'), + construct.Padding(8), + construct.ULInt32('directory_strings_offset'), + construct.ULInt32('number_of_directory_strings'), + construct.Padding(4)) + + # Note that at the moment for the purpose of this parser + # the v23 and v26 volume information structures are the same. + VOLUME_INFORMATION_V23 = construct.Struct( + 'volume_information_v23', + construct.ULInt32('device_path_offset'), + construct.ULInt32('device_path_number_of_characters'), + construct.ULInt64('creation_time'), + construct.ULInt32('serial_number'), + construct.Padding(8), + construct.ULInt32('directory_strings_offset'), + construct.ULInt32('number_of_directory_strings'), + construct.Padding(68)) + + def _ParseFileHeader(self, file_object): + """Parses the file header. + + Args: + file_object: A file-like object to read data from. + + Returns: + The file header construct object. + """ + try: + file_header = self.FILE_HEADER_STRUCT.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + raise errors.UnableToParseFile( + u'Unable to parse file header with error: {0:s}'.format(exception)) + + if not file_header: + raise errors.UnableToParseFile(u'Unable to read file header') + + if file_header.get('signature', None) != self.FILE_SIGNATURE: + raise errors.UnableToParseFile(u'Unsupported file signature') + + return file_header + + def _ParseFileInformation(self, file_object, format_version): + """Parses the file information. + + Args: + file_object: A file-like object to read data from. + format_version: The format version. + + Returns: + The file information construct object. + """ + try: + if format_version == 17: + file_information = self.FILE_INFORMATION_V17.parse_stream(file_object) + elif format_version == 23: + file_information = self.FILE_INFORMATION_V23.parse_stream(file_object) + elif format_version == 26: + file_information = self.FILE_INFORMATION_V26.parse_stream(file_object) + else: + file_information = None + except (IOError, construct.FieldError) as exception: + raise errors.UnableToParseFile( + u'Unable to parse v{0:d} file information with error: {1:s}'.format( + format_version, exception)) + + if not file_information: + raise errors.UnableToParseFile( + u'Unable to read v{0:d} file information'.format(format_version)) + return file_information + + def _ParseMetricsArray(self, file_object, format_version, file_information): + """Parses the metrics array. + + Args: + file_object: A file-like object to read data from. + format_version: The format version. + file_information: The file information construct object. + + Returns: + A list of metrics array entry construct objects. + """ + metrics_array = [] + + metrics_array_offset = file_information.get('metrics_array_offset', 0) + number_of_metrics_array_entries = file_information.get( + 'number_of_metrics_array_entries', 0) + + if metrics_array_offset > 0 and number_of_metrics_array_entries > 0: + file_object.seek(metrics_array_offset, os.SEEK_SET) + + for entry_index in range(0, number_of_metrics_array_entries): + try: + if format_version == 17: + metrics_array_entry = self.METRICS_ARRAY_ENTRY_V17.parse_stream( + file_object) + elif format_version in [23, 26]: + metrics_array_entry = self.METRICS_ARRAY_ENTRY_V23.parse_stream( + file_object) + else: + metrics_array_entry = None + except (IOError, construct.FieldError) as exception: + raise errors.UnableToParseFile(( + u'Unable to parse v{0:d} metrics array entry: {1:d} with error: ' + u'{2:s}').format(format_version, entry_index, exception)) + + if not metrics_array_entry: + raise errors.UnableToParseFile( + u'Unable to read v{0:d} metrics array entry: {1:d}'.format( + format_version, entry_index)) + + metrics_array.append(metrics_array_entry) + + return metrics_array + + def _ParseFilenameStrings(self, file_object, file_information): + """Parses the filename strings. + + Args: + file_object: A file-like object to read data from. + file_information: The file information construct object. + + Returns: + A dict of filename strings with their byte offset as the key. + """ + filename_strings_offset = file_information.get('filename_strings_offset', 0) + filename_strings_size = file_information.get('filename_strings_size', 0) + + if filename_strings_offset > 0 and filename_strings_size > 0: + file_object.seek(filename_strings_offset, os.SEEK_SET) + filename_strings_data = file_object.read(filename_strings_size) + filename_strings = binary.ArrayOfUt16StreamCopyToStringTable( + filename_strings_data) + + else: + filename_strings = {} + + return filename_strings + + def _ParseVolumesInformationSection( + self, file_object, format_version, file_information): + """Parses the volumes information section. + + Args: + file_object: A file-like object to read data from. + format_version: The format version. + file_information: The file information construct object. + + Yields: + A volume information construct object. + """ + volumes_information_offset = file_information.get( + 'volumes_information_offset', 0) + + if volumes_information_offset > 0: + number_of_volumes = file_information.get('number_of_volumes', 0) + file_object.seek(volumes_information_offset, os.SEEK_SET) + + while number_of_volumes > 0: + try: + if format_version == 17: + yield self.VOLUME_INFORMATION_V17.parse_stream(file_object) + else: + yield self.VOLUME_INFORMATION_V23.parse_stream(file_object) + except (IOError, construct.FieldError) as exception: + raise errors.UnableToParseFile(( + u'Unable to parse v{0:d} volume information with error: ' + u'{1:s}').format(format_version, exception)) + + number_of_volumes -= 1 + + def _ParseVolumeDevicePath( + self, file_object, file_information, volume_information): + """Parses the volume device path. + + This function expects the current offset of the file-like object to point + as the end of the volume information structure. + + Args: + file_object: A file-like object to read data from. + file_information: The file information construct object. + volume_information: The volume information construct object. + + Returns: + A Unicode string containing the device path or None if not available. + """ + volumes_information_offset = file_information.get( + 'volumes_information_offset', 0) + + device_path = None + if volumes_information_offset > 0: + device_path_offset = volume_information.get('device_path_offset', 0) + device_path_size = 2 * volume_information.get( + 'device_path_number_of_characters', 0) + + if device_path_offset >= 36 and device_path_size > 0: + device_path_offset += volumes_information_offset + current_offset = file_object.tell() + + file_object.seek(device_path_offset, os.SEEK_SET) + device_path = binary.ReadUtf16Stream( + file_object, byte_size=device_path_size) + + file_object.seek(current_offset, os.SEEK_SET) + + return device_path + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extracts events from a Windows Prefetch file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Raises: + UnableToParseFile: when the file cannot be parsed. + """ + file_object = file_entry.GetFileObject() + file_header = self._ParseFileHeader(file_object) + + format_version = file_header.get('version', None) + if format_version not in [17, 23, 26]: + raise errors.UnableToParseFile( + u'Unsupported format version: {0:d}'.format(format_version)) + + # Add ourselves to the parser chain, which will be used in all subsequent + # event creation in this parser. + parser_chain = self._BuildParserChain(parser_chain) + + file_information = self._ParseFileInformation(file_object, format_version) + metrics_array = self._ParseMetricsArray( + file_object, format_version, file_information) + try: + filename_strings = self._ParseFilenameStrings( + file_object, file_information) + except UnicodeDecodeError as exception: + logging.warning(( + u'[{0:s}] Unable to parse filename information from file {1:s} ' + u'with error: {2:s}').format( + parser_chain, + file_entry.path_spec.comparable.replace(u'\n', u';'), + exception)) + filename_strings = {} + + if len(metrics_array) != len(filename_strings): + logging.debug( + u'Mismatch in number of metrics and filename strings array entries.') + + executable = binary.Ut16StreamCopyToString( + file_header.get('executable', u'')) + + volume_serial_numbers = [] + volume_device_paths = [] + path = u'' + + for volume_information in self._ParseVolumesInformationSection( + file_object, format_version, file_information): + volume_serial_number = volume_information.get('serial_number', 0) + volume_device_path = self._ParseVolumeDevicePath( + file_object, file_information, volume_information) + + volume_serial_numbers.append(volume_serial_number) + volume_device_paths.append(volume_device_path) + + timestamp = volume_information.get('creation_time', 0) + if timestamp: + event_object = windows_events.WindowsVolumeCreationEvent( + timestamp, volume_device_path, volume_serial_number, + file_entry.name) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + for filename in filename_strings.itervalues(): + if not filename: + continue + if (filename.startswith(volume_device_path) and + filename.endswith(executable)): + _, _, path = filename.partition(volume_device_path) + + mapped_files = [] + for metrics_array_entry in metrics_array: + file_reference = metrics_array_entry.get('file_reference', 0) + filename_string_offset = metrics_array_entry.get( + 'filename_string_offset', 0) + + filename = filename_strings.get(filename_string_offset, u'') + if not filename: + logging.debug(u'Missing filename string for offset: {0:d}.'.format( + filename_string_offset)) + continue + + if file_reference: + mapped_file_string = ( + u'{0:s} [MFT entry: {1:d}, sequence: {2:d}]').format( + filename, file_reference & 0xffffffffffffL, + file_reference >> 48) + else: + mapped_file_string = filename + + mapped_files.append(mapped_file_string) + + timestamp = file_information.get('last_run_time', 0) + if timestamp: + event_object = WinPrefetchExecutionEvent( + timestamp, eventdata.EventTimestamp.LAST_RUNTIME, file_header, + file_information, mapped_files, path, volume_serial_numbers, + volume_device_paths) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + # Check for the 7 older last run time values available in v26. + if format_version == 26: + for last_run_time_index in range(1, 8): + last_run_time_identifier = 'last_run_time{0:d}'.format( + last_run_time_index) + + timestamp = file_information.get(last_run_time_identifier, 0) + if timestamp: + event_object = WinPrefetchExecutionEvent( + timestamp, + u'Previous {0:s}'.format(eventdata.EventTimestamp.LAST_RUNTIME), + file_header, file_information, mapped_files, path, + volume_serial_numbers, volume_device_paths) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + file_object.close() + + +manager.ParsersManager.RegisterParser(WinPrefetchParser) diff --git a/plaso/parsers/winprefetch_test.py b/plaso/parsers/winprefetch_test.py new file mode 100644 index 0000000..8cd1256 --- /dev/null +++ b/plaso/parsers/winprefetch_test.py @@ -0,0 +1,378 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Windows prefetch parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winprefetch as winprefetch_formatter +from plaso.lib import eventdata +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import winprefetch + + +class WinPrefetchParserTest(test_lib.ParserTestCase): + """Tests for the Windows prefetch parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = winprefetch.WinPrefetchParser() + + def testParse17(self): + """Tests the Parse function on a version 17 Prefetch file.""" + test_file = self._GetTestFilePath(['CMD.EXE-087B4001.pf']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 2) + + # The prefetch last run event. + event_object = event_objects[1] + + self.assertEquals(event_object.version, 17) + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-03-10 10:11:49.281250') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.LAST_RUNTIME) + self.assertEquals(event_object.executable, u'CMD.EXE') + self.assertEquals(event_object.prefetch_hash, 0x087b4001) + self.assertEquals(event_object.volume_serial_numbers[0], 0x24cb074b) + + expected_mapped_files = [ + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\NTDLL.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\KERNEL32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\UNICODE.NLS', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\LOCALE.NLS', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\SORTTBLS.NLS', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSVCRT.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\CMD.EXE', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\USER32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\GDI32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\SHIMENG.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\APPPATCH\\SYSMAIN.SDB', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\APPPATCH\\ACGENRAL.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\ADVAPI32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\RPCRT4.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\WINMM.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\OLE32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\OLEAUT32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\MSACM32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\VERSION.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\SHELL32.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\SHLWAPI.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\USERENV.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\UXTHEME.DLL', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\CTYPE.NLS', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\SORTKEY.NLS', + (u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\WINSXS\\X86_MICROSOFT.WINDOWS.' + u'COMMON-CONTROLS_6595B64144CCF1DF_6.0.2600.2180_X-WW_A84F1FF9\\' + u'COMCTL32.DLL'), + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\WINDOWSSHELL.MANIFEST', + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\SYSTEM32\\COMCTL32.DLL', + (u'\\DEVICE\\HARDDISKVOLUME1\\D50FF1E628137B1A251B47AB9466\\UPDATE\\' + u'UPDATE.EXE.MANIFEST'), + u'\\DEVICE\\HARDDISKVOLUME1\\$MFT', + (u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\IE7\\SPUNINST\\SPUNINST.EXE.' + u'MANIFEST'), + (u'\\DEVICE\\HARDDISKVOLUME1\\D50FF1E628137B1A251B47AB9466\\UPDATE\\' + u'IERESETICONS.EXE'), + u'\\DEVICE\\HARDDISKVOLUME1\\WINDOWS\\IE7\\SPUNINST\\IERESETICONS.EXE'] + + self.assertEquals(event_object.mapped_files, expected_mapped_files) + + # The volume creation event. + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-03-10 10:19:46.234375') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + + expected_msg = ( + u'\\DEVICE\\HARDDISKVOLUME1 ' + u'Serial number: 0x24CB074B ' + u'Origin: CMD.EXE-087B4001.pf') + + expected_msg_short = ( + u'\\DEVICE\\HARDDISKVOLUME1 ' + u'Origin: CMD.EXE-087B4001.pf') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + def testParse23(self): + """Tests the Parse function on a version 23 Prefetch file.""" + test_file = self._GetTestFilePath(['PING.EXE-B29F6629.pf']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 2) + + # The prefetch last run event. + event_object = event_objects[1] + self.assertEquals(event_object.version, 23) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-06 19:00:55.932955') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.LAST_RUNTIME) + + self.assertEquals(event_object.executable, u'PING.EXE') + self.assertEquals(event_object.prefetch_hash, 0xb29f6629) + self.assertEquals( + event_object.path, u'\\WINDOWS\\SYSTEM32\\PING.EXE') + self.assertEquals(event_object.run_count, 14) + self.assertEquals( + event_object.volume_device_paths[0], u'\\DEVICE\\HARDDISKVOLUME1') + self.assertEquals(event_object.volume_serial_numbers[0], 0xac036525) + + expected_msg = ( + u'Prefetch [PING.EXE] was executed - run count 14 path: ' + u'\\WINDOWS\\SYSTEM32\\PING.EXE ' + u'hash: 0xB29F6629 ' + u'volume: 1 [serial number: 0xAC036525, ' + u'device path: \\DEVICE\\HARDDISKVOLUME1]') + + expected_msg_short = u'PING.EXE was run 14 time(s)' + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + # The volume creation event. + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2010-11-10 17:37:26.484375') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + + def testParse23MultiVolume(self): + """Tests the Parse function on a mulit volume version 23 Prefetch file.""" + test_file = self._GetTestFilePath(['WUAUCLT.EXE-830BCC14.pf']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 6) + + # The prefetch last run event. + event_object = event_objects[5] + self.assertEquals(event_object.version, 23) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-03-15 21:17:39.807996') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.LAST_RUNTIME) + + self.assertEquals(event_object.executable, u'WUAUCLT.EXE') + self.assertEquals(event_object.prefetch_hash, 0x830bcc14) + self.assertEquals( + event_object.path, u'\\WINDOWS\\SYSTEM32\\WUAUCLT.EXE') + self.assertEquals(event_object.run_count, 25) + self.assertEquals( + event_object.volume_device_paths[0], u'\\DEVICE\\HARDDISKVOLUME1') + self.assertEquals(event_object.volume_serial_numbers[0], 0xac036525) + + expected_msg = ( + u'Prefetch [WUAUCLT.EXE] was executed - run count 25 path: ' + u'\\WINDOWS\\SYSTEM32\\WUAUCLT.EXE ' + u'hash: 0x830BCC14 ' + u'volume: 1 [serial number: 0xAC036525, ' + u'device path: \\DEVICE\\HARDDISKVOLUME1], ' + u'volume: 2 [serial number: 0xAC036525, ' + u'device path: \\DEVICE\\HARDDISKVOLUMESHADOWCOPY2], ' + u'volume: 3 [serial number: 0xAC036525, ' + u'device path: \\DEVICE\\HARDDISKVOLUMESHADOWCOPY4], ' + u'volume: 4 [serial number: 0xAC036525, ' + u'device path: \\DEVICE\\HARDDISKVOLUMESHADOWCOPY7], ' + u'volume: 5 [serial number: 0xAC036525, ' + u'device path: \\DEVICE\\HARDDISKVOLUMESHADOWCOPY8]') + + expected_msg_short = u'WUAUCLT.EXE was run 25 time(s)' + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + # The volume creation event. + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2010-11-10 17:37:26.484375') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + + expected_msg = ( + u'\\DEVICE\\HARDDISKVOLUME1 ' + u'Serial number: 0xAC036525 ' + u'Origin: WUAUCLT.EXE-830BCC14.pf') + + expected_msg_short = ( + u'\\DEVICE\\HARDDISKVOLUME1 ' + u'Origin: WUAUCLT.EXE-830BCC14.pf') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + def testParse26(self): + """Tests the Parse function on a version 26 Prefetch file.""" + test_file = self._GetTestFilePath(['TASKHOST.EXE-3AE259FC.pf']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 5) + + # The prefetch last run event. + event_object = event_objects[1] + self.assertEquals(event_object.version, 26) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-10-04 15:40:09.037833') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.LAST_RUNTIME) + self.assertEquals(event_object.executable, u'TASKHOST.EXE') + self.assertEquals(event_object.prefetch_hash, 0x3ae259fc) + + # The prefetch previous last run event. + event_object = event_objects[2] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-10-04 15:28:09.010356') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, + u'Previous {0:s}'.format(eventdata.EventTimestamp.LAST_RUNTIME)) + + expected_mapped_files = [ + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\NTDLL.DLL ' + u'[MFT entry: 46299, sequence: 1]'), + u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\TASKHOST.EXE', + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\KERNEL32.DLL ' + u'[MFT entry: 45747, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\KERNELBASE.DLL ' + u'[MFT entry: 45734, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\LOCALE.NLS ' + u'[MFT entry: 45777, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\MSVCRT.DLL ' + u'[MFT entry: 46033, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\RPCRT4.DLL ' + u'[MFT entry: 46668, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\COMBASE.DLL ' + u'[MFT entry: 44616, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\OLEAUT32.DLL ' + u'[MFT entry: 46309, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\OLE32.DLL ' + u'[MFT entry: 46348, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\RPCSS.DLL ' + u'[MFT entry: 46654, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\KERNEL.APPCORE.DLL ' + u'[MFT entry: 45698, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\CRYPTBASE.DLL ' + u'[MFT entry: 44560, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\BCRYPTPRIMITIVES.DLL ' + u'[MFT entry: 44355, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\USER32.DLL ' + u'[MFT entry: 47130, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\GDI32.DLL ' + u'[MFT entry: 45344, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\EN-US\\' + u'TASKHOST.EXE.MUI'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\SECHOST.DLL ' + u'[MFT entry: 46699, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\CLBCATQ.DLL ' + u'[MFT entry: 44511, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\RACENGN.DLL ' + u'[MFT entry: 46549, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\NTMARTA.DLL ' + u'[MFT entry: 46262, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\WEVTAPI.DLL ' + u'[MFT entry: 47223, sequence: 1]'), + u'\\DEVICE\\HARDDISKVOLUME2\\$MFT', + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\SQMAPI.DLL ' + u'[MFT entry: 46832, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\AEPIC.DLL ' + u'[MFT entry: 43991, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\WINTRUST.DLL ' + u'[MFT entry: 47372, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\SLWGA.DLL ' + u'[MFT entry: 46762, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\DXGI.DLL ' + u'[MFT entry: 44935, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\ESENT.DLL ' + u'[MFT entry: 45256, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\WMICLNT.DLL ' + u'[MFT entry: 47413, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\ADVAPI32.DLL ' + u'[MFT entry: 43994, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\SFC_OS.DLL ' + u'[MFT entry: 46729, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\VERSION.DLL ' + u'[MFT entry: 47120, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\CRYPT32.DLL ' + u'[MFT entry: 44645, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\MSASN1.DLL ' + u'[MFT entry: 45909, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\WTSAPI32.DLL ' + u'[MFT entry: 47527, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\SPPC.DLL ' + u'[MFT entry: 46803, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\POWRPROF.DLL ' + u'[MFT entry: 46413, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\PROFAPI.DLL ' + u'[MFT entry: 46441, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\PROGRAMDATA\\MICROSOFT\\RAC\\STATEDATA\\' + u'RACMETADATA.DAT [MFT entry: 39345, sequence: 2]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\GLOBALIZATION\\SORTING\\' + u'SORTDEFAULT.NLS [MFT entry: 37452, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\RACRULES.XML ' + u'[MFT entry: 46509, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\TASKSCHD.DLL ' + u'[MFT entry: 47043, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\SSPICLI.DLL ' + u'[MFT entry: 46856, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\XMLLITE.DLL ' + u'[MFT entry: 47569, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\PROGRAMDATA\\MICROSOFT\\RAC\\STATEDATA\\' + u'RACWMIEVENTDATA.DAT [MFT entry: 23870, sequence: 3]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\PROGRAMDATA\\MICROSOFT\\RAC\\STATEDATA\\' + u'RACWMIDATABOOKMARKS.DAT [MFT entry: 23871, sequence: 2]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\TPMTASKS.DLL ' + u'[MFT entry: 47003, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\NCRYPT.DLL ' + u'[MFT entry: 46073, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\BCRYPT.DLL ' + u'[MFT entry: 44346, sequence: 1]'), + (u'\\DEVICE\\HARDDISKVOLUME2\\WINDOWS\\SYSTEM32\\NTASN1.DLL ' + u'[MFT entry: 46261, sequence: 1]')] + + self.assertEquals(event_object.mapped_files, expected_mapped_files) + + # The volume creation event. + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-10-04 15:57:26.146547') + self.assertEquals(event_object.timestamp, expected_timestamp) + self.assertEquals( + event_object.timestamp_desc, eventdata.EventTimestamp.CREATION_TIME) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg.py b/plaso/parsers/winreg.py new file mode 100644 index 0000000..4cb488e --- /dev/null +++ b/plaso/parsers/winreg.py @@ -0,0 +1,336 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for Windows NT Registry (REGF) files.""" + +import logging +import os + +from plaso.lib import errors +from plaso.parsers import interface +from plaso.parsers import manager +from plaso.winreg import cache +from plaso.winreg import winregistry + + +# TODO: add tests for this class. +class PluginList(object): + """A simple class that stores information about Windows Registry plugins.""" + + def __init__(self): + """Initializes the plugin list object.""" + super(PluginList, self).__init__() + self._key_plugins = {} + self._value_plugins = {} + + def __iter__(self): + """Return an iterator of all Windows Registry plugins.""" + ret = [] + _ = map(ret.extend, self._key_plugins.values()) + _ = map(ret.extend, self._value_plugins.values()) + for item in ret: + yield item + + def _GetPluginsByType(self, plugins_dict, plugin_type): + """Retrieves the Windows Registry plugins of a specific type. + + Args: + plugins_dict: Dictionary containing the Windows Registry plugins + by plugin type. + plugin_type: String containing the Windows Registry type, + e.g. NTUSER, SOFTWARE. + + Returns: + A list containing the Windows Registry plugins (instances of + RegistryPlugin) for the specific plugin type. + """ + return plugins_dict.get(plugin_type, []) + plugins_dict.get('any', []) + + def AddPlugin(self, plugin_type, plugin_class): + """Add a Windows Registry plugin to the plugin list. + + Args: + plugin_type: String containing the Windows Registry type, + e.g. NTUSER, SOFTWARE. + plugin_class: The plugin class that is being registered. + """ + # Cannot import the interface here otherwise this will create a cyclic + # dependency. + if hasattr(plugin_class, 'REG_VALUES'): + self._value_plugins.setdefault(plugin_type, []).append(plugin_class) + + else: + self._key_plugins.setdefault(plugin_type, []).append(plugin_class) + + def GetAllKeyPlugins(self): + """Return all key plugins as a list.""" + ret = [] + _ = map(ret.extend, self._key_plugins.values()) + return ret + + def GetAllValuePlugins(self): + """Return a list of a all classes that implement value-based plugins.""" + ret = [] + _ = map(ret.extend, self._value_plugins.values()) + return ret + + def GetExpandedKeyPaths( + self, parser_context, reg_cache=None, plugin_names=None): + """Retrieves a list of expanded Windows Registry key paths. + + Args: + parser_context: A parser context object (instance of ParserContext). + reg_cache: Optional Windows Registry objects cache (instance of + WinRegistryCache). The default is None. + plugin_names: Optional list of plugin names, if defined only keys from + these plugins will be expanded. The default is None which + means all key plugins will get expanded keys. + + Returns: + A list of expanded Windows Registry key paths. + """ + key_paths = [] + for key_plugin_cls in self.GetAllKeyPlugins(): + key_plugin = key_plugin_cls(reg_cache=reg_cache) + + if plugin_names and key_plugin.NAME not in plugin_names: + continue + key_plugin.ExpandKeys(parser_context) + if not key_plugin.expanded_keys: + continue + + for key_path in key_plugin.expanded_keys: + if key_path not in key_paths: + key_paths.append(key_path) + + return key_paths + + def GetKeyPlugins(self, plugin_type): + """Retrieves the Windows Registry key-based plugins of a specific type. + + Args: + plugin_type: String containing the Windows Registry type, + e.g. NTUSER, SOFTWARE. + + Returns: + A list containing the Windows Registry plugins (instances of + RegistryPlugin) for the specific plugin type. + """ + return self._GetPluginsByType(self._key_plugins, plugin_type) + + def GetTypes(self): + """Return a set of all plugins supported.""" + return set(self._key_plugins).union(self._value_plugins) + + def GetValuePlugins(self, plugin_type): + """Retrieves the Windows Registry value-based plugins of a specific type. + + Args: + plugin_type: String containing the Windows Registry type, + e.g. NTUSER, SOFTWARE. + + Returns: + A list containing the Windows Registry plugins (instances of + RegistryPlugin) for the specific plugin type. + """ + return self._GetPluginsByType(self._value_plugins, plugin_type) + + def GetWeights(self): + """Return a set of all weights/priority of the loaded plugins.""" + return set(plugin.WEIGHT for plugin in self.GetAllValuePlugins()).union( + plugin.WEIGHT for plugin in self.GetAllKeyPlugins()) + + def GetWeightPlugins(self, weight, plugin_type=''): + """Return a list of all plugins for a given weight or priority. + + Each plugin defines a weight or a priority that defines in which order + it should be processed in the case of a parser that applies priority. + + This method returns all plugins, whether they are key or value based + that use a defined weight or priority and are defined to parse keys + or values found in a certain Windows Registry type. + + Args: + weight: An integer representing the weight or priority (usually a + number from 1 to 3). + plugin_type: A string that defines the Windows Registry type, eg. NTUSER, + SOFTWARE, etc. + + Returns: + A list that contains all the plugins that fit the defined criteria. + """ + ret = [] + for reg_plugin in self.GetKeyPlugins(plugin_type): + if reg_plugin.WEIGHT == weight: + ret.append(reg_plugin) + + for reg_plugin in self.GetValuePlugins(plugin_type): + if reg_plugin.WEIGHT == weight: + ret.append(reg_plugin) + + return ret + + +class WinRegistryParser(interface.BasePluginsParser): + """Parses Windows NT Registry (REGF) files.""" + + NAME = 'winreg' + DESCRIPTION = u'Parser for Windows NT Registry (REGF) files.' + + _plugin_classes = {} + + # List of types Windows Registry types and required keys to identify each of + # these types. + REG_TYPES = { + 'NTUSER': ('\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer',), + 'SOFTWARE': ('\\Microsoft\\Windows\\CurrentVersion\\App Paths',), + 'SECURITY': ('\\Policy\\PolAdtEv',), + 'SYSTEM': ('\\Select',), + 'SAM': ('\\SAM\\Domains\\Account\\Users',), + 'UNKNOWN': (), + } + + def __init__(self): + """Initializes a parser object.""" + super(WinRegistryParser, self).__init__() + self._plugins = WinRegistryParser.GetPluginList() + + def _RecurseKey(self, key): + """A generator that takes a key and yields every subkey of it.""" + # In the case of a Registry file not having a root key we will not be able + # to traverse the Registry, in which case we need to return here. + if not key: + return + + yield key + + for subkey in key.GetSubkeys(): + for recursed_key in self._RecurseKey(subkey): + yield recursed_key + + @classmethod + def GetPluginList(cls): + """Build a list of all available plugins. + + Returns: + A plugins list (instance of PluginList). + """ + plugins_list = PluginList() + for _, plugin_class in cls.GetPlugins(): + plugins_list.AddPlugin(plugin_class.REG_TYPE, plugin_class) + return plugins_list + + def Parse(self, parser_context, file_entry, parser_chain=None): + """Extract data from a Windows Registry file. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + # TODO: Remove this magic reads when the classifier has been + # implemented, until then we need to make sure we are dealing with + # a Windows NT Registry file before proceeding. + magic = 'regf' + + file_object = file_entry.GetFileObject() + file_object.seek(0, os.SEEK_SET) + data = file_object.read(len(magic)) + file_object.close() + + if data != magic: + raise errors.UnableToParseFile(( + u'[{0:s}] unable to parse file: {1:s} with error: invalid ' + u'signature.').format(self.NAME, file_entry.name)) + + registry = winregistry.WinRegistry( + winregistry.WinRegistry.BACKEND_PYREGF) + + # Determine type, find all parsers + try: + winreg_file = registry.OpenFile( + file_entry, codepage=parser_context.codepage) + except IOError as exception: + raise errors.UnableToParseFile( + u'[{0:s}] unable to parse file: {1:s} with error: {2:s}'.format( + self.NAME, file_entry.name, exception)) + + # Detect the Windows Registry file type. + registry_type = 'UNKNOWN' + for reg_type in self.REG_TYPES: + if reg_type == 'UNKNOWN': + continue + + # Check if all the known keys for a certain Registry file exist. + known_keys_found = True + for known_key_path in self.REG_TYPES[reg_type]: + if not winreg_file.GetKeyByPath(known_key_path): + known_keys_found = False + break + + if known_keys_found: + registry_type = reg_type + break + + self._registry_type = registry_type + logging.debug( + u'Windows Registry file {0:s}: detected as: {1:s}'.format( + file_entry.name, registry_type)) + + registry_cache = cache.WinRegistryCache() + registry_cache.BuildCache(winreg_file, registry_type) + + plugins = {} + number_of_plugins = 0 + for weight in self._plugins.GetWeights(): + plist = self._plugins.GetWeightPlugins(weight, registry_type) + plugins[weight] = [] + for plugin in plist: + plugins[weight].append(plugin(reg_cache=registry_cache)) + number_of_plugins += 1 + + logging.debug( + u'Number of plugins for this Windows Registry file: {0:d}.'.format( + number_of_plugins)) + + # Recurse through keys in the file and apply the plugins in the order: + # 1. file type specific key-based plugins. + # 2. generic key-based plugins. + # 3. file type specific value-based plugins. + # 4. generic value-based plugins. + root_key = winreg_file.GetKeyByPath(u'\\') + + parser_chain = self._BuildParserChain(parser_chain) + + for key in self._RecurseKey(root_key): + for weight in plugins.iterkeys(): + # TODO: determine if the plugin matches the key and continue + # to the next key. + for plugin in plugins[weight]: + if parser_context.abort: + break + + plugin.Process( + parser_context, file_entry=file_entry, key=key, + registry_type=self._registry_type, + codepage=parser_context.codepage, parser_chain=parser_chain) + + winreg_file.Close() + + +manager.ParsersManager.RegisterParser(WinRegistryParser) diff --git a/plaso/parsers/winreg_plugins/__init__.py b/plaso/parsers/winreg_plugins/__init__.py new file mode 100644 index 0000000..9464d3a --- /dev/null +++ b/plaso/parsers/winreg_plugins/__init__.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the import statements for the Registry plugins.""" + +from plaso.parsers.winreg_plugins import appcompatcache +from plaso.parsers.winreg_plugins import bagmru +from plaso.parsers.winreg_plugins import ccleaner +from plaso.parsers.winreg_plugins import default +from plaso.parsers.winreg_plugins import lfu +from plaso.parsers.winreg_plugins import mountpoints +from plaso.parsers.winreg_plugins import mrulist +from plaso.parsers.winreg_plugins import mrulistex +from plaso.parsers.winreg_plugins import msie_zones +from plaso.parsers.winreg_plugins import officemru +from plaso.parsers.winreg_plugins import outlook +from plaso.parsers.winreg_plugins import run +from plaso.parsers.winreg_plugins import sam_users +from plaso.parsers.winreg_plugins import services +from plaso.parsers.winreg_plugins import shutdown +from plaso.parsers.winreg_plugins import task_scheduler +from plaso.parsers.winreg_plugins import terminal_server +from plaso.parsers.winreg_plugins import typedurls +from plaso.parsers.winreg_plugins import userassist +from plaso.parsers.winreg_plugins import usb +from plaso.parsers.winreg_plugins import usbstor +from plaso.parsers.winreg_plugins import winrar +from plaso.parsers.winreg_plugins import winver diff --git a/plaso/parsers/winreg_plugins/appcompatcache.py b/plaso/parsers/winreg_plugins/appcompatcache.py new file mode 100644 index 0000000..49b9566 --- /dev/null +++ b/plaso/parsers/winreg_plugins/appcompatcache.py @@ -0,0 +1,624 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Windows Registry plugin to parse the Application Compatibility Cache key.""" + +import construct +import logging + +from plaso.events import time_events +from plaso.lib import binary +from plaso.lib import eventdata +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +class AppCompatCacheEvent(time_events.FiletimeEvent): + """Class that contains the event object for AppCompatCache entries.""" + + DATA_TYPE = 'windows:registry:appcompatcache' + + def __init__( + self, filetime, usage, key, entry_index, path, offset): + """Initializes a Windows Registry event. + + Args: + filetime: The FILETIME timestamp value. + usage: The description of the usage of the time value. + key: Name of the Registry key being parsed. + entry_index: The cache entry index number for the record. + path: The full path to the executable. + offset: The (data) offset of the Registry key or value. + """ + super(AppCompatCacheEvent, self).__init__(filetime, usage) + + self.keyname = key + self.offset = offset + self.entry_index = entry_index + self.path = path + + +class AppCompatCacheHeader(object): + """Class that contains the Application Compatibility Cache header.""" + + def __init__(self): + """Initializes the header object.""" + super(AppCompatCacheHeader, self).__init__() + self.number_of_cached_entries = 0 + self.header_size = 0 + + +class AppCompatCacheCachedEntry(object): + """Class that contains the Application Compatibility Cache cached entry.""" + + def __init__(self): + """Initializes the cached entry object.""" + super(AppCompatCacheCachedEntry, self).__init__() + self.cached_entry_size = 0 + self.data = None + self.file_size = None + self.insertion_flags = None + self.last_modification_time = None + self.last_update_time = None + self.shim_flags = None + self.path = None + + +class AppCompatCachePlugin(interface.KeyPlugin): + """Class that parses the Application Compatibility Cache Registry data.""" + + NAME = 'winreg_appcompatcache' + DESCRIPTION = u'Parser for Application Compatibility Cache Registry data.' + + REG_KEYS = [ + u'\\{current_control_set}\\Control\\Session Manager\\AppCompatibility', + u'\\{current_control_set}\\Control\\Session Manager\\AppCompatCache'] + REG_TYPE = 'SYSTEM' + URL = [ + (u'https://code.google.com/p/winreg-kb/wiki/' + u'ApplicationCompatibilityCacheKey')] + + _FORMAT_TYPE_2000 = 1 + _FORMAT_TYPE_XP = 2 + _FORMAT_TYPE_2003 = 3 + _FORMAT_TYPE_VISTA = 4 + _FORMAT_TYPE_7 = 5 + _FORMAT_TYPE_8 = 6 + + # AppCompatCache format signature used in Windows XP. + _HEADER_SIGNATURE_XP = 0xdeadbeef + + # AppCompatCache format used in Windows XP. + _HEADER_XP_32BIT_STRUCT = construct.Struct( + 'appcompatcache_header_xp', + construct.ULInt32('signature'), + construct.ULInt32('number_of_cached_entries'), + construct.ULInt32('unknown1'), + construct.ULInt32('unknown2'), + construct.Padding(384)) + + _CACHED_ENTRY_XP_32BIT_STRUCT = construct.Struct( + 'appcompatcache_cached_entry_xp_32bit', + construct.Array(528, construct.Byte('path')), + construct.ULInt64('last_modification_time'), + construct.ULInt64('file_size'), + construct.ULInt64('last_update_time')) + + # AppCompatCache format signature used in Windows 2003, Vista and 2008. + _HEADER_SIGNATURE_2003 = 0xbadc0ffe + + # AppCompatCache format used in Windows 2003. + _HEADER_2003_STRUCT = construct.Struct( + 'appcompatcache_header_2003', + construct.ULInt32('signature'), + construct.ULInt32('number_of_cached_entries')) + + _CACHED_ENTRY_2003_32BIT_STRUCT = construct.Struct( + 'appcompatcache_cached_entry_2003_32bit', + construct.ULInt16('path_size'), + construct.ULInt16('maximum_path_size'), + construct.ULInt32('path_offset'), + construct.ULInt64('last_modification_time'), + construct.ULInt64('file_size')) + + _CACHED_ENTRY_2003_64BIT_STRUCT = construct.Struct( + 'appcompatcache_cached_entry_2003_64bit', + construct.ULInt16('path_size'), + construct.ULInt16('maximum_path_size'), + construct.ULInt32('unknown1'), + construct.ULInt64('path_offset'), + construct.ULInt64('last_modification_time'), + construct.ULInt64('file_size')) + + # AppCompatCache format used in Windows Vista and 2008. + _CACHED_ENTRY_VISTA_32BIT_STRUCT = construct.Struct( + 'appcompatcache_cached_entry_vista_32bit', + construct.ULInt16('path_size'), + construct.ULInt16('maximum_path_size'), + construct.ULInt32('path_offset'), + construct.ULInt64('last_modification_time'), + construct.ULInt32('insertion_flags'), + construct.ULInt32('shim_flags')) + + _CACHED_ENTRY_VISTA_64BIT_STRUCT = construct.Struct( + 'appcompatcache_cached_entry_vista_64bit', + construct.ULInt16('path_size'), + construct.ULInt16('maximum_path_size'), + construct.ULInt32('unknown1'), + construct.ULInt64('path_offset'), + construct.ULInt64('last_modification_time'), + construct.ULInt32('insertion_flags'), + construct.ULInt32('shim_flags')) + + # AppCompatCache format signature used in Windows 7 and 2008 R2. + _HEADER_SIGNATURE_7 = 0xbadc0fee + + # AppCompatCache format used in Windows 7 and 2008 R2. + _HEADER_7_STRUCT = construct.Struct( + 'appcompatcache_header_7', + construct.ULInt32('signature'), + construct.ULInt32('number_of_cached_entries'), + construct.Padding(120)) + + _CACHED_ENTRY_7_32BIT_STRUCT = construct.Struct( + 'appcompatcache_cached_entry_7_32bit', + construct.ULInt16('path_size'), + construct.ULInt16('maximum_path_size'), + construct.ULInt32('path_offset'), + construct.ULInt64('last_modification_time'), + construct.ULInt32('insertion_flags'), + construct.ULInt32('shim_flags'), + construct.ULInt32('data_size'), + construct.ULInt32('data_offset')) + + _CACHED_ENTRY_7_64BIT_STRUCT = construct.Struct( + 'appcompatcache_cached_entry_7_64bit', + construct.ULInt16('path_size'), + construct.ULInt16('maximum_path_size'), + construct.ULInt32('unknown1'), + construct.ULInt64('path_offset'), + construct.ULInt64('last_modification_time'), + construct.ULInt32('insertion_flags'), + construct.ULInt32('shim_flags'), + construct.ULInt64('data_size'), + construct.ULInt64('data_offset')) + + # AppCompatCache format used in Windows 8.0 and 8.1. + _HEADER_SIGNATURE_8 = 0x00000080 + + _HEADER_8_STRUCT = construct.Struct( + 'appcompatcache_header_8', + construct.ULInt32('signature'), + construct.Padding(124)) + + _CACHED_ENTRY_HEADER_8_STRUCT = construct.Struct( + 'appcompatcache_cached_entry_header_8', + construct.ULInt32('signature'), + construct.ULInt32('unknown1'), + construct.ULInt32('cached_entry_data_size'), + construct.ULInt16('path_size')) + + # AppCompatCache format used in Windows 8.0. + _CACHED_ENTRY_SIGNATURE_8_0 = '00ts' + + # AppCompatCache format used in Windows 8.1. + _CACHED_ENTRY_SIGNATURE_8_1 = '10ts' + + def _CheckSignature(self, value_data): + """Parses and validates the signature. + + Args: + value_data: a binary string containing the value data. + + Returns: + The format type if successful or None otherwise. + """ + signature = construct.ULInt32('signature').parse(value_data) + if signature == self._HEADER_SIGNATURE_XP: + return self._FORMAT_TYPE_XP + + elif signature == self._HEADER_SIGNATURE_2003: + # TODO: determine which format version is used (2003 or Vista). + return self._FORMAT_TYPE_2003 + + elif signature == self._HEADER_SIGNATURE_7: + return self._FORMAT_TYPE_7 + + elif signature == self._HEADER_SIGNATURE_8: + if value_data[signature:signature + 4] in [ + self._CACHED_ENTRY_SIGNATURE_8_0, self._CACHED_ENTRY_SIGNATURE_8_1]: + return self._FORMAT_TYPE_8 + + def _DetermineCacheEntrySize( + self, format_type, value_data, cached_entry_offset): + """Determines the size of a cached entry. + + Args: + format_type: integer value that contains the format type. + value_data: a binary string containing the value data. + cached_entry_offset: integer value that contains the offset of + the first cached entry data relative to the start of + the value data. + + Returns: + The cached entry size if successful or None otherwise. + + Raises: + RuntimeError: if the format type is not supported. + """ + if format_type not in [ + self._FORMAT_TYPE_XP, self._FORMAT_TYPE_2003, self._FORMAT_TYPE_VISTA, + self._FORMAT_TYPE_7, self._FORMAT_TYPE_8]: + raise RuntimeError( + u'[{0:s}] Unsupported format type: {1:d}'.format( + self.NAME, format_type)) + + cached_entry_data = value_data[cached_entry_offset:] + cached_entry_size = 0 + + if format_type == self._FORMAT_TYPE_XP: + cached_entry_size = self._CACHED_ENTRY_XP_32BIT_STRUCT.sizeof() + + elif format_type in [ + self._FORMAT_TYPE_2003, self._FORMAT_TYPE_VISTA, self._FORMAT_TYPE_7]: + path_size = construct.ULInt16('path_size').parse(cached_entry_data[0:2]) + maximum_path_size = construct.ULInt16('maximum_path_size').parse( + cached_entry_data[2:4]) + path_offset_32bit = construct.ULInt32('path_offset').parse( + cached_entry_data[4:8]) + path_offset_64bit = construct.ULInt32('path_offset').parse( + cached_entry_data[8:16]) + + if maximum_path_size < path_size: + logging.error( + u'[{0:s}] Path size value out of bounds.'.format(self.NAME)) + return + + path_end_of_string_size = maximum_path_size - path_size + if path_size == 0 or path_end_of_string_size != 2: + logging.error( + u'[{0:s}] Unsupported path size values.'.format(self.NAME)) + return + + # Assume the entry is 64-bit if the 32-bit path offset is 0 and + # the 64-bit path offset is set. + if path_offset_32bit == 0 and path_offset_64bit != 0: + if format_type == self._FORMAT_TYPE_2003: + cached_entry_size = self._CACHED_ENTRY_2003_64BIT_STRUCT.sizeof() + elif format_type == self._FORMAT_TYPE_VISTA: + cached_entry_size = self._CACHED_ENTRY_VISTA_64BIT_STRUCT.sizeof() + elif format_type == self._FORMAT_TYPE_7: + cached_entry_size = self._CACHED_ENTRY_7_64BIT_STRUCT.sizeof() + + else: + if format_type == self._FORMAT_TYPE_2003: + cached_entry_size = self._CACHED_ENTRY_2003_32BIT_STRUCT.sizeof() + elif format_type == self._FORMAT_TYPE_VISTA: + cached_entry_size = self._CACHED_ENTRY_VISTA_32BIT_STRUCT.sizeof() + elif format_type == self._FORMAT_TYPE_7: + cached_entry_size = self._CACHED_ENTRY_7_32BIT_STRUCT.sizeof() + + elif format_type == self._FORMAT_TYPE_8: + cached_entry_size = self._CACHED_ENTRY_HEADER_8_STRUCT.sizeof() + + return cached_entry_size + + def _ParseHeader(self, format_type, value_data): + """Parses the header. + + Args: + format_type: integer value that contains the format type. + value_data: a binary string containing the value data. + + Returns: + A header object (instance of AppCompatCacheHeader). + + Raises: + RuntimeError: if the format type is not supported. + """ + if format_type not in [ + self._FORMAT_TYPE_XP, self._FORMAT_TYPE_2003, self._FORMAT_TYPE_VISTA, + self._FORMAT_TYPE_7, self._FORMAT_TYPE_8]: + raise RuntimeError( + u'[{0:s}] Unsupported format type: {1:d}'.format( + self.NAME, format_type)) + + # TODO: change to collections.namedtuple or use __slots__ if the overhead + # of a regular object becomes a problem. + header_object = AppCompatCacheHeader() + + if format_type == self._FORMAT_TYPE_XP: + header_object.header_size = self._HEADER_XP_32BIT_STRUCT.sizeof() + header_struct = self._HEADER_XP_32BIT_STRUCT.parse(value_data) + + elif format_type == self._FORMAT_TYPE_2003: + header_object.header_size = self._HEADER_2003_STRUCT.sizeof() + header_struct = self._HEADER_2003_STRUCT.parse(value_data) + + elif format_type == self._FORMAT_TYPE_VISTA: + header_object.header_size = self._HEADER_VISTA_STRUCT.sizeof() + header_struct = self._HEADER_VISTA_STRUCT.parse(value_data) + + elif format_type == self._FORMAT_TYPE_7: + header_object.header_size = self._HEADER_7_STRUCT.sizeof() + header_struct = self._HEADER_7_STRUCT.parse(value_data) + + elif format_type == self._FORMAT_TYPE_8: + header_object.header_size = self._HEADER_8_STRUCT.sizeof() + header_struct = self._HEADER_8_STRUCT.parse(value_data) + + if format_type in [ + self._FORMAT_TYPE_XP, self._FORMAT_TYPE_2003, self._FORMAT_TYPE_VISTA, + self._FORMAT_TYPE_7]: + header_object.number_of_cached_entries = header_struct.get( + 'number_of_cached_entries') + + return header_object + + def _ParseCachedEntry( + self, format_type, value_data, cached_entry_offset, cached_entry_size): + """Parses a cached entry. + + Args: + format_type: integer value that contains the format type. + value_data: a binary string containing the value data. + cached_entry_offset: integer value that contains the offset of + the cached entry data relative to the start of + the value data. + cached_entry_size: integer value that contains the cached entry data size. + + Returns: + A cached entry object (instance of AppCompatCacheCachedEntry). + + Raises: + RuntimeError: if the format type is not supported. + """ + if format_type not in [ + self._FORMAT_TYPE_XP, self._FORMAT_TYPE_2003, self._FORMAT_TYPE_VISTA, + self._FORMAT_TYPE_7, self._FORMAT_TYPE_8]: + raise RuntimeError( + u'[{0:s}] Unsupported format type: {1:d}'.format( + self.NAME, format_type)) + + cached_entry_data = value_data[ + cached_entry_offset:cached_entry_offset + cached_entry_size] + + cached_entry_struct = None + + if format_type == self._FORMAT_TYPE_XP: + if cached_entry_size == self._CACHED_ENTRY_XP_32BIT_STRUCT.sizeof(): + cached_entry_struct = self._CACHED_ENTRY_XP_32BIT_STRUCT.parse( + cached_entry_data) + + elif format_type == self._FORMAT_TYPE_2003: + if cached_entry_size == self._CACHED_ENTRY_2003_32BIT_STRUCT.sizeof(): + cached_entry_struct = self._CACHED_ENTRY_2003_32BIT_STRUCT.parse( + cached_entry_data) + + elif cached_entry_size == self._CACHED_ENTRY_2003_64BIT_STRUCT.sizeof(): + cached_entry_struct = self._CACHED_ENTRY_2003_64BIT_STRUCT.parse( + cached_entry_data) + + elif format_type == self._FORMAT_TYPE_VISTA: + if cached_entry_size == self._CACHED_ENTRY_VISTA_32BIT_STRUCT.sizeof(): + cached_entry_struct = self._CACHED_ENTRY_VISTA_32BIT_STRUCT.parse( + cached_entry_data) + + elif cached_entry_size == self._CACHED_ENTRY_VISTA_64BIT_STRUCT.sizeof(): + cached_entry_struct = self._CACHED_ENTRY_VISTA_64BIT_STRUCT.parse( + cached_entry_data) + + elif format_type == self._FORMAT_TYPE_7: + if cached_entry_size == self._CACHED_ENTRY_7_32BIT_STRUCT.sizeof(): + cached_entry_struct = self._CACHED_ENTRY_7_32BIT_STRUCT.parse( + cached_entry_data) + + elif cached_entry_size == self._CACHED_ENTRY_7_64BIT_STRUCT.sizeof(): + cached_entry_struct = self._CACHED_ENTRY_7_64BIT_STRUCT.parse( + cached_entry_data) + + elif format_type == self._FORMAT_TYPE_8: + if cached_entry_data[0:4] not in [ + self._CACHED_ENTRY_SIGNATURE_8_0, self._CACHED_ENTRY_SIGNATURE_8_1]: + raise RuntimeError( + u'[{0:s}] Unsupported cache entry signature'.format(self.NAME)) + + if cached_entry_size == self._CACHED_ENTRY_HEADER_8_STRUCT.sizeof(): + cached_entry_struct = self._CACHED_ENTRY_HEADER_8_STRUCT.parse( + cached_entry_data) + + cached_entry_data_size = cached_entry_struct.get( + 'cached_entry_data_size') + cached_entry_size = 12 + cached_entry_data_size + + cached_entry_data = value_data[ + cached_entry_offset:cached_entry_offset + cached_entry_size] + + if not cached_entry_struct: + raise RuntimeError( + u'[{0:s}] Unsupported cache entry size: {1:d}'.format( + self.NAME, cached_entry_size)) + + cached_entry_object = AppCompatCacheCachedEntry() + cached_entry_object.cached_entry_size = cached_entry_size + + path_offset = 0 + data_size = 0 + + if format_type == self._FORMAT_TYPE_XP: + string_size = 0 + for string_index in xrange(0, 528, 2): + if (ord(cached_entry_data[string_index]) == 0 and + ord(cached_entry_data[string_index + 1]) == 0): + break + string_size += 2 + + cached_entry_object.path = binary.Ut16StreamCopyToString( + cached_entry_data[0:string_size]) + + elif format_type in [ + self._FORMAT_TYPE_2003, self._FORMAT_TYPE_VISTA, self._FORMAT_TYPE_7]: + path_size = cached_entry_struct.get('path_size') + path_offset = cached_entry_struct.get('path_offset') + + elif format_type == self._FORMAT_TYPE_8: + path_size = cached_entry_struct.get('path_size') + + cached_entry_data_offset = 14 + path_size + cached_entry_object.path = binary.Ut16StreamCopyToString( + cached_entry_data[14:cached_entry_data_offset]) + + remaining_data = cached_entry_data[cached_entry_data_offset:] + + cached_entry_object.insertion_flags = construct.ULInt32( + 'insertion_flags').parse(remaining_data[0:4]) + cached_entry_object.shim_flags = construct.ULInt32( + 'shim_flags').parse(remaining_data[4:8]) + + if cached_entry_data[0:4] == self._CACHED_ENTRY_SIGNATURE_8_0: + cached_entry_data_offset += 8 + + elif cached_entry_data[0:4] == self._CACHED_ENTRY_SIGNATURE_8_1: + cached_entry_data_offset += 10 + + remaining_data = cached_entry_data[cached_entry_data_offset:] + + if format_type in [ + self._FORMAT_TYPE_XP, self._FORMAT_TYPE_2003, self._FORMAT_TYPE_VISTA, + self._FORMAT_TYPE_7]: + cached_entry_object.last_modification_time = cached_entry_struct.get( + 'last_modification_time') + + elif format_type == self._FORMAT_TYPE_8: + cached_entry_object.last_modification_time = construct.ULInt64( + 'last_modification_time').parse(remaining_data[0:8]) + + if format_type in [self._FORMAT_TYPE_XP, self._FORMAT_TYPE_2003]: + cached_entry_object.file_size = cached_entry_struct.get('file_size') + + elif format_type in [self._FORMAT_TYPE_VISTA, self._FORMAT_TYPE_7]: + cached_entry_object.insertion_flags = cached_entry_struct.get( + 'insertion_flags') + cached_entry_object.shim_flags = cached_entry_struct.get('shim_flags') + + if format_type == self._FORMAT_TYPE_XP: + cached_entry_object.last_update_time = cached_entry_struct.get( + 'last_update_time') + + if format_type == self._FORMAT_TYPE_7: + data_offset = cached_entry_struct.get('data_offset') + data_size = cached_entry_struct.get('data_size') + + elif format_type == self._FORMAT_TYPE_8: + data_offset = cached_entry_offset + cached_entry_data_offset + 12 + data_size = construct.ULInt32('data_size').parse(remaining_data[8:12]) + + if path_offset > 0 and path_size > 0: + path_size += path_offset + + cached_entry_object.path = binary.Ut16StreamCopyToString( + value_data[path_offset:path_size]) + + if data_size > 0: + data_size += data_offset + + cached_entry_object.data = value_data[data_offset:data_size] + + return cached_entry_object + + def GetEntries(self, parser_context, key=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Extracts event objects from a Application Compatibility Cache key. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + """ + value = key.GetValue('AppCompatCache') + if not value: + return + + value_data = value.data + value_data_size = len(value.data) + + format_type = self._CheckSignature(value_data) + if not format_type: + # TODO: Instead of logging emit a parser error object that once that + # mechanism is implemented. + logging.error( + u'AppCompatCache format error: [{0:s}] Unsupported signature'.format( + key.path)) + return + + header_object = self._ParseHeader(format_type, value_data) + + # On Windows Vista and 2008 when the cache is empty it will + # only consist of the header. + if value_data_size <= header_object.header_size: + return + + cached_entry_offset = header_object.header_size + cached_entry_size = self._DetermineCacheEntrySize( + format_type, value_data, cached_entry_offset) + + if not cached_entry_size: + # TODO: Instead of logging emit a parser error object that once that + # mechanism is implemented. + logging.error( + u'AppCompatCache format error: [{0:s}] Unsupported cached entry ' + u'size.'.format(key.path)) + return + + cached_entry_index = 0 + while cached_entry_offset < value_data_size: + cached_entry_object = self._ParseCachedEntry( + format_type, value_data, cached_entry_offset, cached_entry_size) + + if cached_entry_object.last_modification_time is not None: + # TODO: refactor to file modification event. + event_object = AppCompatCacheEvent( + cached_entry_object.last_modification_time, + u'File Last Modification Time', key.path, + cached_entry_index + 1, cached_entry_object.path, + cached_entry_offset) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if cached_entry_object.last_update_time is not None: + # TODO: refactor to process run event. + event_object = AppCompatCacheEvent( + cached_entry_object.last_update_time, + eventdata.EventTimestamp.LAST_RUNTIME, key.path, + cached_entry_index + 1, cached_entry_object.path, + cached_entry_offset) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + cached_entry_offset += cached_entry_object.cached_entry_size + cached_entry_index += 1 + + if (header_object.number_of_cached_entries != 0 and + cached_entry_index >= header_object.number_of_cached_entries): + break + + +winreg.WinRegistryParser.RegisterPlugin(AppCompatCachePlugin) diff --git a/plaso/parsers/winreg_plugins/appcompatcache_test.py b/plaso/parsers/winreg_plugins/appcompatcache_test.py new file mode 100644 index 0000000..84dca55 --- /dev/null +++ b/plaso/parsers/winreg_plugins/appcompatcache_test.py @@ -0,0 +1,73 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Application Compatibility Cache key Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import appcompatcache +from plaso.parsers.winreg_plugins import test_lib + + +class AppCompatCacheRegistryPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the AppCompatCache Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = appcompatcache.AppCompatCachePlugin() + + def testProcess(self): + """Tests the Process function.""" + knowledge_base_values = {'current_control_set': u'ControlSet001'} + test_file_entry = self._GetTestFileEntryFromPath(['SYSTEM']) + key_path = u'\\ControlSet001\\Control\\Session Manager\\AppCompatCache' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, + knowledge_base_values=knowledge_base_values, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 330) + + event_object = event_objects[9] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-04 01:46:37.932964') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + self.assertEquals(event_object.keyname, key_path) + expected_msg = ( + u'[{0:s}] Cached entry: 10 Path: ' + u'\\??\\C:\\Windows\\PSEXESVC.EXE'.format(event_object.keyname)) + + expected_msg_short = ( + u'Path: \\??\\C:\\Windows\\PSEXESVC.EXE') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/bagmru.py b/plaso/parsers/winreg_plugins/bagmru.py new file mode 100644 index 0000000..5e65b2e --- /dev/null +++ b/plaso/parsers/winreg_plugins/bagmru.py @@ -0,0 +1,206 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains BagMRU Windows Registry plugins (shellbags).""" + +import logging + +import construct + +from plaso.events import windows_events +from plaso.parsers.shared import shell_items +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +class BagMRUPlugin(interface.KeyPlugin): + """Class that defines a BagMRU Windows Registry plugin.""" + + NAME = 'winreg_bagmru' + DESCRIPTION = u'Parser for BagMRU Registry data.' + + # TODO: remove REG_TYPE and use HKEY_CURRENT_USER instead. + REG_TYPE = 'any' + + REG_KEYS = frozenset([ + u'\\Software\\Microsoft\\Windows\\Shell\\BagMRU', + u'\\Software\\Microsoft\\Windows\\ShellNoRoam\\BagMRU', + (u'\\Local Settings\\Software\\Microsoft\\Windows\\' + u'Shell\\BagMRU'), + (u'\\Local Settings\\Software\\Microsoft\\Windows\\' + u'ShellNoRoam\\BagMRU'), + (u'\\Wow6432Node\\Local Settings\\Software\\' + u'Microsoft\\Windows\\Shell\\BagMRU'), + (u'\\Wow6432Node\\Local Settings\\Software\\' + u'Microsoft\\Windows\\ShellNoRoam\\BagMRU')]) + + URLS = [u'https://code.google.com/p/winreg-kb/wiki/MRUKeys'] + + _MRULISTEX_STRUCT = construct.Range(1, 500, construct.ULInt32('entry_number')) + + def _ParseMRUListExEntryValue( + self, parser_context, key, entry_index, entry_number, text_dict, + value_strings, parent_value_string, codepage='cp1252', file_entry=None, + parser_chain=None, **unused_kwargs): + """Parses the MRUListEx entry value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUListEx value. + entry_index: integer value representing the MRUListEx entry index. + entry_number: integer value representing the entry number. + text_dict: text dictionary object to append textual strings. + value_strings: value string dictionary object to append value strings. + parent_value_string: string containing the parent value string. + codepage: Optional extended ASCII string codepage. The default is cp1252. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + value = key.GetValue(u'{0:d}'.format(entry_number)) + value_string = u'' + if value is None: + logging.debug( + u'[{0:s}] Missing MRUListEx entry value: {1:d} in key: {2:s}.'.format( + self.name, entry_number, key.path)) + + elif not value.DataIsBinaryData(): + logging.debug(( + u'[{0:s}] Non-binary MRUListEx entry value: {1:d} in key: ' + u'{2:s}.').format(self.name, entry_number, key.path)) + + elif value.data: + shell_items_parser = shell_items.ShellItemsParser(key.path) + shell_items_parser.Parse( + parser_context, value.data, codepage=codepage, file_entry=file_entry, + parser_chain=parser_chain) + + value_string = shell_items_parser.CopyToPath() + if parent_value_string: + value_string = u', '.join([parent_value_string, value_string]) + + value_strings[entry_number] = value_string + + value_string = u'Shell item list: [{0:s}]'.format(value_string) + + value_text = u'Index: {0:d} [MRU Value {1:d}]'.format( + entry_index + 1, entry_number) + + text_dict[value_text] = value_string + + def _ParseMRUListExValue(self, key): + """Parsed the MRUListEx value in a given Registry key. + + Args: + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUListEx value. + + Returns: + A MRUListEx value generator, which returns the MRU index number + and entry value. + """ + mru_list_value = key.GetValue('MRUListEx') + if not mru_list_value: + return enumerate([]) + + try: + mru_list = self._MRULISTEX_STRUCT.parse(mru_list_value.data) + except construct.FieldError: + logging.warning(u'[{0:s}] Unable to parse the MRU key: {1:s}'.format( + self.name, key.path)) + return enumerate([]) + + return enumerate(mru_list) + + def _ParseSubKey( + self, parser_context, key, parent_value_string, registry_type=None, + file_entry=None, parser_chain=None, codepage='cp1252'): + """Extract event objects from a MRUListEx Registry key. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey). + parent_value_string: string containing the parent value string. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + """ + text_dict = {} + value_strings = {} + for index, entry_number in self._ParseMRUListExValue(key): + # TODO: detect if list ends prematurely. + # MRU lists are terminated with 0xffffffff (-1). + if entry_number == 0xffffffff: + break + + self._ParseMRUListExEntryValue( + parser_context, key, index, entry_number, text_dict, value_strings, + parent_value_string, codepage=codepage, file_entry=file_entry, + parser_chain=parser_chain) + + event_object = windows_events.WindowsRegistryEvent( + key.last_written_timestamp, key.path, text_dict, + offset=key.offset, registry_type=registry_type, urls=self.URLS, + source_append=': BagMRU') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + for index, entry_number in self._ParseMRUListExValue(key): + # TODO: detect if list ends prematurely. + # MRU lists are terminated with 0xffffffff (-1). + if entry_number == 0xffffffff: + break + + sub_key = key.GetSubkey(u'{0:d}'.format(entry_number)) + if not sub_key: + logging.debug( + u'[{0:s}] Missing BagMRU sub key: {1:d} in key: {2:s}.'.format( + self.name, key.path, entry_number)) + continue + + value_string = value_strings.get(entry_number, u'') + self._ParseSubKey( + parser_context, sub_key, value_string, file_entry=file_entry, + parser_chain=parser_chain, codepage=codepage) + + def GetEntries( + self, parser_context, key=None, registry_type=None, codepage='cp1252', + file_entry=None, parser_chain=None, **unused_kwargs): + """Extract event objects from a Registry key containing a MRUListEx value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + """ + self._ParseSubKey( + parser_context, key, u'', registry_type=registry_type, + codepage=codepage, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(BagMRUPlugin) diff --git a/plaso/parsers/winreg_plugins/bagmru_test.py b/plaso/parsers/winreg_plugins/bagmru_test.py new file mode 100644 index 0000000..90ae118 --- /dev/null +++ b/plaso/parsers/winreg_plugins/bagmru_test.py @@ -0,0 +1,99 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the BagMRU Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import bagmru +from plaso.parsers.winreg_plugins import test_lib + + +class TestBagMRUPlugin(test_lib.RegistryPluginTestCase): + """Tests for the BagMRU plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = bagmru.BagMRUPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER.DAT']) + key_path = ( + u'\\Software\\Microsoft\\Windows\\ShellNoRoam\\BagMRU') + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 15) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-08-04 15:19:16.997750') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[{0:s}] ' + u'Index: 1 [MRU Value 0]: ' + u'Shell item list: [My Computer]').format(key_path) + + expected_msg_short = ( + u'[{0:s}] Index: 1 [MRU Value 0]: Shel...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[1] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-08-04 15:19:10.669625') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[{0:s}\\0] ' + u'Index: 1 [MRU Value 0]: ' + u'Shell item list: [My Computer, C:\\]').format(key_path) + + expected_msg_short = ( + u'[{0:s}\\0] Index: 1 [MRU Value 0]: Sh...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[14] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-08-04 15:19:16.997750') + self.assertEquals(event_object.timestamp, expected_timestamp) + + # The winreg_formatter will add a space after the key path even when there + # is not text. + expected_msg = u'[{0:s}\\0\\0\\0\\0\\0] '.format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/ccleaner.py b/plaso/parsers/winreg_plugins/ccleaner.py new file mode 100644 index 0000000..de4eab9 --- /dev/null +++ b/plaso/parsers/winreg_plugins/ccleaner.py @@ -0,0 +1,87 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parser for the CCleaner Registry key.""" + +from plaso.events import windows_events +from plaso.lib import timelib +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +__author__ = 'Marc Seguin (segumarc@gmail.com)' + + +class CCleanerPlugin(interface.KeyPlugin): + """Gathers the CCleaner Keys for NTUSER hive.""" + + NAME = 'winreg_ccleaner' + DESCRIPTION = u'Parser for CCleaner Registry data.' + + REG_KEYS = [u'\\Software\\Piriform\\CCleaner'] + REG_TYPE = 'NTUSER' + + URLS = [(u'http://cheeky4n6monkey.blogspot.com/2012/02/writing-ccleaner' + u'-regripper-plugin-part_05.html')] + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Extracts event objects from a CCleaner Registry key. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + for value in key.GetValues(): + if not value.name or not value.data: + continue + + text_dict = {} + text_dict[value.name] = value.data + + if value.name == u'UpdateKey': + timestamp = timelib.Timestamp.FromTimeString( + value.data, timezone=parser_context.timezone) + event_object = windows_events.WindowsRegistryEvent( + timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type) + + elif value.name == '0': + event_object = windows_events.WindowsRegistryEvent( + key.timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type) + + else: + # TODO: change this event not to set a timestamp of 0. + event_object = windows_events.WindowsRegistryEvent( + 0, key.path, text_dict, offset=key.offset, + registry_type=registry_type) + + event_object.source_append = u': CCleaner Registry key' + + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(CCleanerPlugin) diff --git a/plaso/parsers/winreg_plugins/ccleaner_test.py b/plaso/parsers/winreg_plugins/ccleaner_test.py new file mode 100644 index 0000000..432bbb2 --- /dev/null +++ b/plaso/parsers/winreg_plugins/ccleaner_test.py @@ -0,0 +1,83 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the CCleaner Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import ccleaner +from plaso.parsers.winreg_plugins import test_lib + + +__author__ = 'Marc Seguin (segumarc@gmail.com)' + + +class CCleanerRegistryPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the CCleaner Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = ccleaner.CCleanerPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER-CCLEANER.DAT']) + key_path = u'\\Software\\Piriform\\CCleaner' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 17) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2013-07-13 10:03:14') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'UpdateKey' + expected_value = u'07/13/2013 10:03:14 AM' + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_string = u'[{0:s}] {1:s}: {2:s}'.format( + key_path, regvalue_identifier, expected_value) + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + event_object = event_objects[2] + + self.assertEquals(event_object.timestamp, 0) + + regvalue_identifier = u'(App)Delete Index.dat files' + expected_value = u'True' + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_string = u'[{0:s}] {1:s}: {2:s}'.format( + key_path, regvalue_identifier, expected_value) + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/default.py b/plaso/parsers/winreg_plugins/default.py new file mode 100644 index 0000000..a8a0e49 --- /dev/null +++ b/plaso/parsers/winreg_plugins/default.py @@ -0,0 +1,120 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The default Windows Registry plugin.""" + +from plaso.events import windows_events +from plaso.lib import utils +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +class DefaultPlugin(interface.KeyPlugin): + """Default plugin that extracts minimum information from every registry key. + + The default plugin will parse every registry key that is passed to it and + extract minimum information, such as a list of available values and if + possible content of those values. The timestamp used is the timestamp + when the registry key was last modified. + """ + + NAME = 'winreg_default' + DESCRIPTION = u'Parser for Registry data.' + + REG_TYPE = 'any' + REG_KEYS = [] + + # This is a special case, plugins normally never overwrite the priority. + # However the default plugin should only run when all others plugins have + # tried and failed. + WEIGHT = 3 + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Returns an event object based on a Registry key name and values. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + """ + text_dict = {} + + if key.number_of_values == 0: + text_dict[u'Value'] = u'No values stored in key.' + + else: + for value in key.GetValues(): + if not value.name: + value_name = '(default)' + else: + value_name = u'{0:s}'.format(value.name) + + if value.data is None: + value_string = u'[{0:s}] Empty'.format( + value.data_type_string) + elif value.DataIsString(): + string_decode = utils.GetUnicodeString(value.data) + value_string = u'[{0:s}] {1:s}'.format( + value.data_type_string, string_decode) + elif value.DataIsInteger(): + value_string = u'[{0:s}] {1:d}'.format( + value.data_type_string, value.data) + elif value.DataIsMultiString(): + if type(value.data) not in (list, tuple): + value_string = u'[{0:s}]'.format(value.data_type_string) + # TODO: Add a flag or some sort of an anomaly alert. + else: + value_string = u'[{0:s}] {1:s}'.format( + value.data_type_string, u''.join(value.data)) + else: + value_string = u'[{0:s}]'.format(value.data_type_string) + + text_dict[value_name] = value_string + + event_object = windows_events.WindowsRegistryEvent( + key.last_written_timestamp, key.path, text_dict, + offset=key.offset, registry_type=registry_type) + + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + # Even though the DefaultPlugin is derived from KeyPlugin it needs to + # overwrite the Process function to make sure it is called when no other + # plugin is available. + + def Process( + self, parser_context, key=None, registry_type=None, + parser_chain=None, **kwargs): + """Process the key and return a generator to extract event objects. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + """ + # Note that we should NOT call the Process function of the KeyPlugin here. + parser_chain = self._BuildParserChain(parser_chain) + self.GetEntries( + parser_context, key=key, registry_type=registry_type, + parser_chain=parser_chain, **kwargs) + + +winreg.WinRegistryParser.RegisterPlugin(DefaultPlugin) diff --git a/plaso/parsers/winreg_plugins/default_test.py b/plaso/parsers/winreg_plugins/default_test.py new file mode 100644 index 0000000..23a4b70 --- /dev/null +++ b/plaso/parsers/winreg_plugins/default_test.py @@ -0,0 +1,79 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the default Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.parsers.winreg_plugins import default +from plaso.parsers.winreg_plugins import test_lib +from plaso.winreg import test_lib as winreg_test_lib + + +class TestDefaultRegistry(test_lib.RegistryPluginTestCase): + """Tests for the default Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = default.DefaultPlugin() + + def testProcess(self): + """Tests the Process function.""" + key_path = u'\\Microsoft\\Some Windows\\InterestingApp\\MRU' + values = [] + values.append(winreg_test_lib.TestRegValue( + 'MRUList', 'acb'.encode('utf_16_le'), 1, 123)) + values.append(winreg_test_lib.TestRegValue( + 'a', 'Some random text here'.encode('utf_16_le'), 1, 1892)) + values.append(winreg_test_lib.TestRegValue( + 'b', 'c:/evil.exe'.encode('utf_16_le'), 3, 612)) + values.append(winreg_test_lib.TestRegValue( + 'c', 'C:/looks_legit.exe'.encode('utf_16_le'), 1, 1001)) + + winreg_key = winreg_test_lib.TestRegKey( + key_path, 1346145829002031, values, 1456) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + self.assertEquals(event_object.timestamp, 1346145829002031) + + expected_msg = ( + u'[{0:s}] ' + u'MRUList: [REG_SZ] acb ' + u'a: [REG_SZ] Some random text here ' + u'b: [REG_BINARY] ' + u'c: [REG_SZ] C:/looks_legit.exe').format(key_path) + + expected_msg_short = ( + u'[{0:s}] MRUList: [REG_SZ] acb a: [REG_SZ...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/interface.py b/plaso/parsers/winreg_plugins/interface.py new file mode 100644 index 0000000..dd23953 --- /dev/null +++ b/plaso/parsers/winreg_plugins/interface.py @@ -0,0 +1,263 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The Windows Registry plugin objects interface.""" + +import abc +import logging + +from plaso.parsers import plugins +from plaso.winreg import path_expander as winreg_path_expander + + +class RegistryPlugin(plugins.BasePlugin): + """Class that defines the Windows Registry plugin object interface.""" + + __abstract = True + + NAME = 'winreg' + DESCRIPTION = u'Parser for Registry data.' + + # Indicate the type of hive this plugin belongs to (eg. NTUSER, SOFTWARE). + REG_TYPE = 'any' + + # URLS should contain a list of URLs with additional information about this + # key or value. + URLS = [] + + # WEIGHT is a simple integer value representing the priority of this plugin. + # The weight can be used by some parser implementation to prioritize the + # order in which plugins are run against the Windows Registry keys. + # By default no the Windows Registry plugin should overwrite this value, + # it should only be defined in interfaces extending the base class, providing + # higher level of prioritization to Windows Registry plugins. + WEIGHT = 3 + + def __init__(self, reg_cache=None): + """Initializes Windows Registry plugin object. + + Args: + reg_cache: Optional Windows Registry objects cache (instance of + WinRegistryCache). The default is None. + """ + super(RegistryPlugin, self).__init__() + # TODO: Clean this up, this value is stored but not used. + self._reg_cache = reg_cache + + @abc.abstractmethod + def GetEntries( + self, parser_context, file_entry=None, key=None, registry_type=None, + parser_chain=None, codepage='cp1252', **kwargs): + """Extracts event objects from the Windows Registry key. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type. The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + """ + + def Process(self, parser_context, parser_chain=None, key=None, **kwargs): + """Processes a Windows Registry key or value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + + Raises: + ValueError: If the key value is not set. + """ + if key is None: + raise ValueError(u'Key is not set.') + + del kwargs['file_entry'] + del kwargs['registry_type'] + del kwargs['codepage'] + + # This will raise if unhandled keyword arguments are passed. + super(RegistryPlugin, self).Process(parser_context, parser_chain, **kwargs) + + +class KeyPlugin(RegistryPlugin): + """Class that defines the Windows Registry key-based plugin interface.""" + + __abstract = True + + # A list of all the Windows Registry key paths this plugins supports. + # Each of these key paths can contain a path that needs to be expanded, + # such as {current_control_set}, etc. + REG_KEYS = [] + + WEIGHT = 1 + + def __init__(self, reg_cache=None): + """Initializes key-based Windows Registry plugin object. + + Args: + reg_cache: Optional Windows Registry objects cache (instance of + WinRegistryCache). The default is None. + """ + super(KeyPlugin, self).__init__(reg_cache=reg_cache) + self._path_expander = winreg_path_expander.WinRegistryKeyPathExpander( + reg_cache=reg_cache) + self.expanded_keys = None + + def ExpandKeys(self, parser_context): + """Builds a list of expanded keys this plugin supports. + + Args: + parser_context: A parser context object (instance of ParserContext). + """ + self.expanded_keys = [] + for registry_key in self.REG_KEYS: + expanded_key = u'' + try: + # TODO: deprecate direct use of pre_obj. + expanded_key = self._path_expander.ExpandPath( + registry_key, pre_obj=parser_context.knowledge_base.pre_obj) + except KeyError as exception: + logging.debug(( + u'Unable to expand Registry key {0:s} for plugin {1:s} with ' + u'error: {2:s}').format(registry_key, self.NAME, exception)) + continue + + if not expanded_key: + continue + + self.expanded_keys.append(expanded_key) + + # Special case of Wow6432 Windows Registry redirection. + # URL: http://msdn.microsoft.com/en-us/library/windows/desktop/\ + # ms724072%28v=vs.85%29.aspx + if expanded_key.startswith('\\Software'): + _, first, second = expanded_key.partition('\\Software') + self.expanded_keys.append(u'{0:s}\\Wow6432Node{1:s}'.format( + first, second)) + + if self.REG_TYPE == 'SOFTWARE' or self.REG_TYPE == 'any': + self.expanded_keys.append(u'\\Wow6432Node{0:s}'.format(expanded_key)) + + @abc.abstractmethod + def GetEntries( + self, parser_context, file_entry=None, key=None, registry_type=None, + codepage='cp1252', parser_chain=None, **kwargs): + """Extracts event objects from the Windows Registry key. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + """ + + def Process( + self, parser_context, file_entry=None, key=None, registry_type=None, + codepage='cp1252', parser_chain=None, **kwargs): + """Processes a Windows Registry key. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + if self.expanded_keys is None: + self.ExpandKeys(parser_context) + + parser_chain = self._BuildParserChain(parser_chain) + + super(KeyPlugin, self).Process( + parser_context, file_entry=file_entry, key=key, + registry_type=registry_type, codepage=codepage, + parser_chain=parser_chain, **kwargs) + + if key and key.path in self.expanded_keys: + self.GetEntries( + parser_context, file_entry=file_entry, key=key, + registry_type=registry_type, codepage=codepage, + parser_chain=parser_chain, **kwargs) + + +class ValuePlugin(RegistryPlugin): + """Class that defines the Windows Registry value-based plugin interface.""" + + __abstract = True + + # REG_VALUES should be defined as a frozenset. + REG_VALUES = frozenset() + + WEIGHT = 2 + + @abc.abstractmethod + def GetEntries( + self, parser_context, file_entry=None, key=None, registry_type=None, + parser_chain=None, codepage='cp1252', **kwargs): + """Extracts event objects from the Windows Registry key. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + """ + + def Process( + self, parser_context, file_entry=None, key=None, registry_type=None, + parser_chain=None, codepage='cp1252', **kwargs): + """Processes a Windows Registry value. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + """ + + parser_chain = self._BuildParserChain(parser_chain) + + super(ValuePlugin, self).Process( + parser_context, file_entry=file_entry, key=key, + registry_type=registry_type, codepage=codepage, **kwargs) + + values = frozenset([val.name for val in key.GetValues()]) + if self.REG_VALUES.issubset(values): + self.GetEntries( + parser_context, file_entry=file_entry, key=key, + registry_type=registry_type, parser_chain=parser_chain, + codepage=codepage, **kwargs) diff --git a/plaso/parsers/winreg_plugins/lfu.py b/plaso/parsers/winreg_plugins/lfu.py new file mode 100644 index 0000000..2ba9a37 --- /dev/null +++ b/plaso/parsers/winreg_plugins/lfu.py @@ -0,0 +1,126 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plug-in to collect the Less Frequently Used Keys.""" + +from plaso.events import windows_events +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +class BootVerificationPlugin(interface.KeyPlugin): + """Plug-in to collect the Boot Verification Key.""" + + NAME = 'winreg_boot_verify' + DESCRIPTION = u'Parser for Boot Verification Registry data.' + + REG_TYPE = 'SYSTEM' + REG_KEYS = [u'\\{current_control_set}\\Control\\BootVerificationProgram'] + + URLS = ['http://technet.microsoft.com/en-us/library/cc782537(v=ws.10).aspx'] + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Gather the BootVerification key values and return one event for all. + + This key is rare, so its presence is suspect. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + """ + text_dict = {} + for value in key.GetValues(): + text_dict[value.name] = value.data + event_object = windows_events.WindowsRegistryEvent( + key.last_written_timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type, urls=self.URLS) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +class BootExecutePlugin(interface.KeyPlugin): + """Plug-in to collect the BootExecute Value from the Session Manager key.""" + + NAME = 'winreg_boot_execute' + DESCRIPTION = u'Parser for Boot Execution Registry data.' + + REG_TYPE = 'SYSTEM' + REG_KEYS = [u'\\{current_control_set}\\Control\\Session Manager'] + + URLS = ['http://technet.microsoft.com/en-us/library/cc963230.aspx'] + + def GetEntries( + self, parser_context, file_entry=None, key=None, registry_type=None, + parser_chain=None, **unused_kwargs): + """Gather the BootExecute Value, compare to default, return event. + + The rest of the values in the Session Manager key are in a separate event. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + text_dict = {} + + for value in key.GetValues(): + if value.name == 'BootExecute': + # MSDN: claims that the data type of this value is REG_BINARY + # although REG_MULTI_SZ is known to be used as well. + if value.DataIsString(): + value_string = value.data + elif value.DataIsMultiString(): + value_string = u''.join(value.data) + elif value.DataIsBinaryData(): + value_string = value.data + else: + value_string = u'' + error_string = ( + u'Key: {0:s}, value: {1:s}: unsupported value data type: ' + u'{2:s}.').format(key.path, value.name, value.data_type_string) + parser_context.ProduceParseError( + self.NAME, error_string, file_entry=file_entry) + + # TODO: why does this have a separate event object? Remove this. + value_dict = {'BootExecute': value_string} + event_object = windows_events.WindowsRegistryEvent( + key.last_written_timestamp, key.path, value_dict, offset=key.offset, + registry_type=registry_type, urls=self.URLS) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + else: + text_dict[value.name] = value.data + + event_object = windows_events.WindowsRegistryEvent( + key.last_written_timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type, urls=self.URLS) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugins([ + BootVerificationPlugin, BootExecutePlugin]) diff --git a/plaso/parsers/winreg_plugins/lfu_test.py b/plaso/parsers/winreg_plugins/lfu_test.py new file mode 100644 index 0000000..b141c2d --- /dev/null +++ b/plaso/parsers/winreg_plugins/lfu_test.py @@ -0,0 +1,155 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Less Frequently Used (LFU) Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import lfu +from plaso.parsers.winreg_plugins import test_lib +from plaso.winreg import cache +from plaso.winreg import test_lib as winreg_test_lib + + +class TestBootExecutePlugin(test_lib.RegistryPluginTestCase): + """Tests for the LFU BootExecute Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + registry_cache = cache.WinRegistryCache() + registry_cache.attributes['current_control_set'] = 'ControlSet001' + self._plugin = lfu.BootExecutePlugin(reg_cache=registry_cache) + + def testProcess(self): + """Tests the Process function.""" + key_path = u'\\ControlSet001\\Control\\Session Manager' + values = [] + + values.append(winreg_test_lib.TestRegValue( + 'BootExecute', 'autocheck autochk *\x00'.encode('utf_16_le'), 7, 123)) + values.append(winreg_test_lib.TestRegValue( + 'CriticalSectionTimeout', '2592000'.encode('utf_16_le'), 1, 153)) + values.append(winreg_test_lib.TestRegValue( + 'ExcludeFromKnownDlls', '\x00'.encode('utf_16_le'), 7, 163)) + values.append(winreg_test_lib.TestRegValue( + 'GlobalFlag', '0'.encode('utf_16_le'), 1, 173)) + values.append(winreg_test_lib.TestRegValue( + 'HeapDeCommitFreeBlockThreshold', '0'.encode('utf_16_le'), 1, 183)) + values.append(winreg_test_lib.TestRegValue( + 'HeapDeCommitTotalFreeThreshold', '0'.encode('utf_16_le'), 1, 203)) + values.append(winreg_test_lib.TestRegValue( + 'HeapSegmentCommit', '0'.encode('utf_16_le'), 1, 213)) + values.append(winreg_test_lib.TestRegValue( + 'HeapSegmentReserve', '0'.encode('utf_16_le'), 1, 223)) + values.append(winreg_test_lib.TestRegValue( + 'NumberOfInitialSessions', '2'.encode('utf_16_le'), 1, 243)) + + timestamp = timelib_test.CopyStringToTimestamp('2012-08-31 20:45:29') + winreg_key = winreg_test_lib.TestRegKey(key_path, timestamp, values, 153) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 2) + + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-08-31 20:45:29') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_string = ( + u'[{0:s}] BootExecute: autocheck autochk *').format(key_path) + + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + event_object = event_objects[1] + + expected_msg = ( + u'[{0:s}] ' + u'CriticalSectionTimeout: 2592000 ' + u'ExcludeFromKnownDlls: [] ' + u'GlobalFlag: 0 ' + u'HeapDeCommitFreeBlockThreshold: 0 ' + u'HeapDeCommitTotalFreeThreshold: 0 ' + u'HeapSegmentCommit: 0 ' + u'HeapSegmentReserve: 0 ' + u'NumberOfInitialSessions: 2').format(key_path) + + expected_msg_short = ( + u'[{0:s}] CriticalSectionTimeout: 2592000 Excl...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +class TestBootVerificationRegistry(test_lib.RegistryPluginTestCase): + """Tests for the LFU BootVerification Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + registry_cache = cache.WinRegistryCache() + registry_cache.attributes['current_control_set'] = 'ControlSet001' + self._plugin = lfu.BootVerificationPlugin(reg_cache=registry_cache) + + def testProcess(self): + """Tests the Process function.""" + key_path = u'\\ControlSet001\\Control\\BootVerificationProgram' + values = [] + + values.append(winreg_test_lib.TestRegValue( + 'ImagePath', + 'C:\\WINDOWS\\system32\\googleupdater.exe'.encode('utf_16_le'), 1, + 123)) + + timestamp = timelib_test.CopyStringToTimestamp('2012-08-31 20:45:29') + winreg_key = winreg_test_lib.TestRegKey(key_path, timestamp, values, 153) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-08-31 20:45:29') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[{0:s}] ' + u'ImagePath: C:\\WINDOWS\\system32\\googleupdater.exe').format( + key_path) + + expected_msg_short = ( + u'[{0:s}] ImagePath: C:\\WINDOWS\\system...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/mountpoints.py b/plaso/parsers/winreg_plugins/mountpoints.py new file mode 100644 index 0000000..ae800d7 --- /dev/null +++ b/plaso/parsers/winreg_plugins/mountpoints.py @@ -0,0 +1,88 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the MountPoints2 plugin.""" + +from plaso.events import windows_events +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +class MountPoints2Plugin(interface.KeyPlugin): + """Windows Registry plugin for parsing the MountPoints2 key.""" + + NAME = 'winreg_mountpoints2' + DESCRIPTION = u'Parser for mount points Registry data.' + + REG_TYPE = 'NTUSER' + + REG_KEYS = [ + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\' + u'MountPoints2')] + + URLS = [u'http://support.microsoft.com/kb/932463'] + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Retrieves information from the MountPoints2 registry key. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + """ + for subkey in key.GetSubkeys(): + name = subkey.name + if not name: + continue + + text_dict = {} + text_dict[u'Volume'] = name + + # Get the label if it exists. + label_value = subkey.GetValue('_LabelFromReg') + if label_value: + text_dict[u'Label'] = label_value.data + + if name.startswith('{'): + text_dict[u'Type'] = u'Volume' + + elif name.startswith('#'): + # The format is: ##Server_Name#Share_Name. + text_dict[u'Type'] = u'Remote Drive' + server_name, _, share_name = name[2:].partition('#') + text_dict[u'Remote_Server'] = server_name + text_dict[u'Share_Name'] = u'\\{0:s}'.format( + share_name.replace(u'#', u'\\')) + + else: + text_dict[u'Type'] = u'Drive' + + event_object = windows_events.WindowsRegistryEvent( + subkey.last_written_timestamp, key.path, text_dict, + offset=subkey.offset, registry_type=registry_type, urls=self.URLS) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(MountPoints2Plugin) diff --git a/plaso/parsers/winreg_plugins/mountpoints_test.py b/plaso/parsers/winreg_plugins/mountpoints_test.py new file mode 100644 index 0000000..c4db38f --- /dev/null +++ b/plaso/parsers/winreg_plugins/mountpoints_test.py @@ -0,0 +1,72 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the MountPoints2 Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import mountpoints +from plaso.parsers.winreg_plugins import test_lib + + +class MountPoints2PluginTest(test_lib.RegistryPluginTestCase): + """Tests for the MountPoints2 Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = mountpoints.MountPoints2Plugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER-WIN7.DAT']) + key_path = self._plugin.REG_KEYS[0] + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 5) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-08-23 17:10:14.960960') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue = event_object.regvalue + self.assertEquals(regvalue.get('Share_Name'), r'\home\nfury') + + expected_string = ( + u'[{0:s}] Label: Home Drive Remote_Server: controller Share_Name: ' + u'\\home\\nfury Type: Remote Drive Volume: ' + u'##controller#home#nfury').format(key_path) + expected_string_short = u'{0:s}...'.format(expected_string[0:77]) + + self._TestGetMessageStrings( + event_object, expected_string, expected_string_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/mrulist.py b/plaso/parsers/winreg_plugins/mrulist.py new file mode 100644 index 0000000..61556c6 --- /dev/null +++ b/plaso/parsers/winreg_plugins/mrulist.py @@ -0,0 +1,308 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a MRUList Registry plugin.""" + +import abc +import logging + +import construct + +from plaso.events import windows_events +from plaso.lib import binary +from plaso.parsers import winreg +from plaso.parsers.shared import shell_items +from plaso.parsers.winreg_plugins import interface + + +# A mixin class is used here to not to have the duplicate functionality +# to parse the MRUList Registry values. However multiple inheritance +# and thus mixins are to be used sparsely in this codebase, hence we need +# to find a better solution in not needing to distinguish between key and +# value plugins. +# TODO: refactor Registry key and value plugin to rid ourselves of the mixin. +class MRUListPluginMixin(object): + """Class for common MRUList Windows Registry plugin functionality.""" + + _MRULIST_STRUCT = construct.Range(1, 500, construct.ULInt16('entry_letter')) + + @abc.abstractmethod + def _ParseMRUListEntryValue( + self, parser_context, key, entry_index, entry_letter, file_entry=None, + parser_chain=None, **kwargs): + """Parses the MRUList entry value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUList value. + entry_index: integer value representing the MRUList entry index. + entry_letter: character value representing the entry. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Returns: + A string containing the value. + """ + + def _ParseMRUListValue(self, key): + """Parses the MRUList value in a given Registry key. + + Args: + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUList value. + + Returns: + A MRUList value generator, which returns the MRU index number + and entry value. + """ + mru_list_value = key.GetValue('MRUList') + + # The key exists but does not contain a value named "MRUList". + if not mru_list_value: + return enumerate([]) + + try: + mru_list = self._MRULIST_STRUCT.parse(mru_list_value.raw_data) + except construct.FieldError: + logging.warning(u'[{0:s}] Unable to parse the MRU key: {1:s}'.format( + self.NAME, key.path)) + return enumerate([]) + + return enumerate(mru_list) + + def _ParseMRUListKey( + self, parser_context, key, registry_type=None, file_entry=None, + parser_chain=None, codepage='cp1252'): + """Extract event objects from a MRUList Registry key. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey). + registry_type: Optional Registry type string. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + text_dict = {} + for entry_index, entry_letter in self._ParseMRUListValue(key): + # TODO: detect if list ends prematurely. + # MRU lists are terminated with \0 (0x0000). + if entry_letter == 0: + break + + entry_letter = chr(entry_letter) + + value_string = self._ParseMRUListEntryValue( + parser_context, key, entry_index, entry_letter, + codepage=codepage, file_entry=file_entry, parser_chain=parser_chain) + + value_text = u'Index: {0:d} [MRU Value {1:s}]'.format( + entry_index + 1, entry_letter) + + text_dict[value_text] = value_string + + event_object = windows_events.WindowsRegistryEvent( + key.last_written_timestamp, key.path, text_dict, + offset=key.offset, registry_type=registry_type, + source_append=': MRU List') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +class MRUListStringPlugin(interface.ValuePlugin, MRUListPluginMixin): + """Windows Registry plugin to parse a string MRUList.""" + + NAME = 'winreg_mrulist_string' + DESCRIPTION = u'Parser for Most Recently Used (MRU) Registry data.' + + REG_TYPE = 'any' + REG_VALUES = frozenset(['MRUList', 'a']) + URLS = [u'http://forensicartifacts.com/tag/mru/'] + + def _ParseMRUListEntryValue( + self, parser_context, key, entry_index, entry_letter, **unused_kwargs): + """Parses the MRUList entry value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUList value. + entry_index: integer value representing the MRUList entry index. + entry_letter: character value representing the entry. + + Returns: + A string containing the value. + """ + value_string = u'' + + value = key.GetValue(u'{0:s}'.format(entry_letter)) + if value is None: + logging.debug( + u'[{0:s}] Missing MRUList entry value: {1:s} in key: {2:s}.'.format( + self.NAME, entry_letter, key.path)) + + elif value.DataIsString(): + value_string = value.data + + elif value.DataIsBinaryData(): + logging.debug(( + u'[{0:s}] Non-string MRUList entry value: {1:s} parsed as string ' + u'in key: {2:s}.').format(self.NAME, entry_letter, key.path)) + utf16_stream = binary.ByteStreamCopyToUtf16Stream(value.data) + + try: + value_string = utf16_stream.decode('utf-16-le') + except UnicodeDecodeError as exception: + value_string = binary.HexifyBuffer(utf16_stream) + logging.warning(( + u'[{0:s}] Unable to decode UTF-16 stream: {1:s} in MRUList entry ' + u'value: {2:s} in key: {3:s} with error: {4:s}').format( + self.NAME, value_string, entry_letter, key.path, exception)) + + return value_string + + def GetEntries( + self, parser_context, file_entry=None, key=None, registry_type=None, + parser_chain=None, codepage='cp1252', **unused_kwargs): + """Extracts event objects from a MRU list. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + """ + self._ParseMRUListKey( + parser_context, key, registry_type=registry_type, + parser_chain=parser_chain, file_entry=file_entry, codepage=codepage) + + def Process( + self, parser_context, file_entry=None, key=None, registry_type=None, + codepage='cp1252', parser_chain=None, **kwargs): + """Determine if we can process this Registry key or not. + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + # Prevent this plugin triggering on sub paths of non-string MRUList values. + if u'Explorer\\DesktopStreamMRU' in key.path: + return + + super(MRUListStringPlugin, self).Process( + parser_context, file_entry=file_entry, key=key, + registry_type=registry_type, codepage=codepage, + parser_chain=parser_chain, **kwargs) + + +class MRUListShellItemListPlugin(interface.KeyPlugin, MRUListPluginMixin): + """Windows Registry plugin to parse a shell item list MRUList.""" + + NAME = 'winreg_mrulist_shell_item_list' + DESCRIPTION = u'Parser for Most Recently Used (MRU) Registry data.' + + REG_TYPE = 'any' + REG_KEYS = frozenset([ + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\' + u'DesktopStreamMRU')]) + + URLS = [u'https://github.com/libyal/winreg-kb/wiki/MRU-keys'] + + def _ParseMRUListEntryValue( + self, parser_context, key, entry_index, entry_letter, codepage='cp1252', + file_entry=None, parser_chain=None, **unused_kwargs): + """Parses the MRUList entry value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUList value. + entry_index: integer value representing the MRUList entry index. + entry_letter: character value representing the entry. + codepage: Optional extended ASCII string codepage. The default is cp1252. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Returns: + A string containing the value. + """ + value_string = u'' + + value = key.GetValue(u'{0:s}'.format(entry_letter)) + if value is None: + logging.debug( + u'[{0:s}] Missing MRUList entry value: {1:s} in key: {2:s}.'.format( + self.NAME, entry_letter, key.path)) + + elif not value.DataIsBinaryData(): + logging.debug(( + u'[{0:s}] Non-binary MRUList entry value: {1:s} in key: ' + u'{2:s}.').format(self.NAME, entry_letter, key.path)) + + elif value.data: + shell_items_parser = shell_items.ShellItemsParser(key.path) + shell_items_parser.Parse( + parser_context, value.data, codepage=codepage, file_entry=file_entry, + parser_chain=parser_chain) + + value_string = u'Shell item list: [{0:s}]'.format( + shell_items_parser.CopyToPath()) + + return value_string + + def GetEntries( + self, parser_context, key=None, registry_type=None, codepage='cp1252', + file_entry=None, parser_chain=None, **unused_kwargs): + """Extract event objects from a Registry key containing a MRUList value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + self._ParseMRUListKey( + parser_context, key, registry_type=registry_type, codepage=codepage, + parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugins([ + MRUListStringPlugin, MRUListShellItemListPlugin]) diff --git a/plaso/parsers/winreg_plugins/mrulist_test.py b/plaso/parsers/winreg_plugins/mrulist_test.py new file mode 100644 index 0000000..2de69ba --- /dev/null +++ b/plaso/parsers/winreg_plugins/mrulist_test.py @@ -0,0 +1,171 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the MRUList Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import mrulist +from plaso.parsers.winreg_plugins import test_lib +from plaso.winreg import test_lib as winreg_test_lib + + +class TestMRUListStringPlugin(test_lib.RegistryPluginTestCase): + """Tests for the string MRUList plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = mrulist.MRUListStringPlugin() + + def testProcess(self): + """Tests the Process function.""" + key_path = u'\\Microsoft\\Some Windows\\InterestingApp\\MRU' + values = [] + + values.append(winreg_test_lib.TestRegValue( + 'MRUList', 'acb'.encode('utf_16_le'), + winreg_test_lib.TestRegValue.REG_SZ, offset=123)) + values.append(winreg_test_lib.TestRegValue( + 'a', 'Some random text here'.encode('utf_16_le'), + winreg_test_lib.TestRegValue.REG_SZ, offset=1892)) + values.append(winreg_test_lib.TestRegValue( + 'b', 'c:/evil.exe'.encode('utf_16_le'), + winreg_test_lib.TestRegValue.REG_BINARY, offset=612)) + values.append(winreg_test_lib.TestRegValue( + 'c', 'C:/looks_legit.exe'.encode('utf_16_le'), + winreg_test_lib.TestRegValue.REG_SZ, offset=1001)) + + timestamp = timelib_test.CopyStringToTimestamp('2012-08-28 09:23:49.002031') + winreg_key = winreg_test_lib.TestRegKey( + key_path, timestamp, values, 1456) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-08-28 09:23:49.002031') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[{0:s}] ' + u'Index: 1 [MRU Value a]: Some random text here ' + u'Index: 2 [MRU Value c]: C:/looks_legit.exe ' + u'Index: 3 [MRU Value b]: c:/evil.exe').format(key_path) + + expected_msg_short = ( + u'[{0:s}] Index: 1 [MRU Value a]: Some ran...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +class TestMRUListShellItemListPlugin(test_lib.RegistryPluginTestCase): + """Tests for the shell item list MRUList plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = mrulist.MRUListShellItemListPlugin() + + def testProcess(self): + """Tests the Process function.""" + key_path = ( + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\' + u'DesktopStreamMRU') + values = [] + + data = ''.join(map(chr, [ + 0x14, 0x00, 0x1f, 0x00, 0xe0, 0x4f, 0xd0, 0x20, 0xea, 0x3a, 0x69, 0x10, + 0xa2, 0xd8, 0x08, 0x00, 0x2b, 0x30, 0x30, 0x9d, 0x19, 0x00, 0x23, 0x43, + 0x3a, 0x5c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0xee, 0x15, 0x00, 0x31, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3e, 0x7a, 0x60, 0x10, 0x80, 0x57, + 0x69, 0x6e, 0x6e, 0x74, 0x00, 0x00, 0x18, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x2e, 0x3e, 0xe4, 0x62, 0x10, 0x00, 0x50, 0x72, 0x6f, 0x66, + 0x69, 0x6c, 0x65, 0x73, 0x00, 0x00, 0x25, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x2e, 0x3e, 0xe4, 0x62, 0x10, 0x00, 0x41, 0x64, 0x6d, 0x69, + 0x6e, 0x69, 0x73, 0x74, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x00, 0x41, 0x44, + 0x4d, 0x49, 0x4e, 0x49, 0x7e, 0x31, 0x00, 0x17, 0x00, 0x31, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x2e, 0x3e, 0xe4, 0x62, 0x10, 0x00, 0x44, 0x65, 0x73, + 0x6b, 0x74, 0x6f, 0x70, 0x00, 0x00, 0x00, 0x00])) + + values.append(winreg_test_lib.TestRegValue( + 'MRUList', 'a'.encode('utf_16_le'), + winreg_test_lib.TestRegValue.REG_SZ, offset=123)) + values.append(winreg_test_lib.TestRegValue( + 'a', data, winreg_test_lib.TestRegValue.REG_BINARY, offset=612)) + + timestamp = timelib_test.CopyStringToTimestamp('2012-08-28 09:23:49.002031') + winreg_key = winreg_test_lib.TestRegKey( + key_path, timestamp, values, 1456) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 5) + + # A MRUList event object. + event_object = event_objects[4] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-08-28 09:23:49.002031') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[{0:s}] ' + u'Index: 1 [MRU Value a]: Shell item list: ' + u'[My Computer, C:\\, Winnt, Profiles, Administrator, Desktop]').format( + key_path) + + expected_msg_short = u'[{0:s}] Index:...'.format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + # A shell item event object. + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-01-14 12:03:52') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'Name: Winnt ' + u'Origin: {0:s}').format(key_path) + + expected_msg_short = ( + u'Name: Winnt ' + u'Origin: \\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\' + u'Deskt...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/mrulistex.py b/plaso/parsers/winreg_plugins/mrulistex.py new file mode 100644 index 0000000..21e26c0 --- /dev/null +++ b/plaso/parsers/winreg_plugins/mrulistex.py @@ -0,0 +1,528 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains MRUListEx Windows Registry plugins.""" + +import abc +import logging + +import construct + +from plaso.events import windows_events +from plaso.lib import binary +from plaso.parsers import winreg +from plaso.parsers.shared import shell_items +from plaso.parsers.winreg_plugins import interface + + +# A mixin class is used here to not to have the duplicate functionality +# to parse the MRUListEx Registry values. However multiple inheritance +# and thus mixins are to be used sparsely in this codebase, hence we need +# to find a better solution in not needing to distinguish between key and +# value plugins. +# TODO: refactor Registry key and value plugin to rid ourselves of the mixin. +class MRUListExPluginMixin(object): + """Class for common MRUListEx Windows Registry plugin functionality.""" + + _MRULISTEX_STRUCT = construct.Range(1, 500, construct.ULInt32('entry_number')) + + @abc.abstractmethod + def _ParseMRUListExEntryValue( + self, parser_context, key, entry_index, entry_number, **kwargs): + """Parses the MRUListEx entry value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUListEx value. + entry_index: integer value representing the MRUListEx entry index. + entry_number: integer value representing the entry number. + + Returns: + A string containing the value. + """ + + def _ParseMRUListExValue(self, key): + """Parses the MRUListEx value in a given Registry key. + + Args: + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUListEx value. + + Returns: + A MRUListEx value generator, which returns the MRU index number + and entry value. + """ + mru_list_value = key.GetValue('MRUListEx') + + # The key exists but does not contain a value named "MRUListEx". + if not mru_list_value: + return enumerate([]) + + try: + mru_list = self._MRULISTEX_STRUCT.parse(mru_list_value.data) + except construct.FieldError: + logging.warning(u'[{0:s}] Unable to parse the MRU key: {1:s}'.format( + self.NAME, key.path)) + return enumerate([]) + + return enumerate(mru_list) + + def _ParseMRUListExKey( + self, parser_context, key, registry_type=None, codepage='cp1252', + file_entry=None, parser_chain=None): + """Extract event objects from a MRUListEx Registry key. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey). + registry_type: Optional Registry type string. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + """ + text_dict = {} + for entry_index, entry_number in self._ParseMRUListExValue(key): + # TODO: detect if list ends prematurely. + # MRU lists are terminated with 0xffffffff (-1). + if entry_number == 0xffffffff: + break + + value_string = self._ParseMRUListExEntryValue( + parser_context, key, entry_index, entry_number, + codepage=codepage, file_entry=file_entry, parser_chain=parser_chain) + + value_text = u'Index: {0:d} [MRU Value {1:d}]'.format( + entry_index + 1, entry_number) + + text_dict[value_text] = value_string + + event_object = windows_events.WindowsRegistryEvent( + key.last_written_timestamp, key.path, text_dict, + offset=key.offset, registry_type=registry_type, + source_append=': MRUListEx') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +class MRUListExStringPlugin(interface.ValuePlugin, MRUListExPluginMixin): + """Windows Registry plugin to parse a string MRUListEx.""" + + NAME = 'winreg_mrulistex_string' + DESCRIPTION = u'Parser for Most Recently Used (MRU) Registry data.' + + REG_TYPE = 'any' + REG_VALUES = frozenset(['MRUListEx', '0']) + + URLS = [ + u'http://forensicartifacts.com/2011/02/recentdocs/', + u'https://github.com/libyal/winreg-kb/wiki/MRU-keys'] + + _STRING_STRUCT = construct.Struct( + 'string_and_shell_item', + construct.RepeatUntil( + lambda obj, ctx: obj == '\x00\x00', construct.Field('string', 2))) + + def _ParseMRUListExEntryValue( + self, parser_context, key, entry_index, entry_number, **unused_kwargs): + """Parses the MRUListEx entry value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUListEx value. + entry_index: integer value representing the MRUListEx entry index. + entry_number: integer value representing the entry number. + + Returns: + A string containing the value. + """ + value_string = u'' + + value = key.GetValue(u'{0:d}'.format(entry_number)) + if value is None: + logging.debug( + u'[{0:s}] Missing MRUListEx entry value: {1:d} in key: {2:s}.'.format( + self.NAME, entry_number, key.path)) + + elif value.DataIsString(): + value_string = value.data + + elif value.DataIsBinaryData(): + logging.debug(( + u'[{0:s}] Non-string MRUListEx entry value: {1:d} parsed as string ' + u'in key: {2:s}.').format(self.NAME, entry_number, key.path)) + utf16_stream = binary.ByteStreamCopyToUtf16Stream(value.data) + + try: + value_string = utf16_stream.decode('utf-16-le') + except UnicodeDecodeError as exception: + value_string = binary.HexifyBuffer(utf16_stream) + logging.warning(( + u'[{0:s}] Unable to decode UTF-16 stream: {1:s} in MRUListEx entry ' + u'value: {2:d} in key: {3:s} with error: {4:s}').format( + self.NAME, value_string, entry_number, key.path, exception)) + + return value_string + + def GetEntries( + self, parser_context, key=None, registry_type=None, codepage='cp1252', + file_entry=None, parser_chain=None, **unused_kwargs): + """Extract event objects from a Registry key containing a MRUListEx value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + """ + self._ParseMRUListExKey( + parser_context, key, registry_type=registry_type, codepage=codepage, + parser_chain=parser_chain, file_entry=file_entry) + + def Process(self, parser_context, key=None, codepage='cp1252', **kwargs): + """Determine if we can process this Registry key or not. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: A Windows Registry key (instance of WinRegKey). + codepage: Optional extended ASCII string codepage. The default is cp1252. + """ + # Prevent this plugin triggering on sub paths of non-string MRUListEx + # values. + if (u'BagMRU' in key.path or u'Explorer\\StreamMRU' in key.path or + u'\\Explorer\\ComDlg32\\OpenSavePidlMRU' in key.path): + return + + super(MRUListExStringPlugin, self).Process( + parser_context, key=key, codepage=codepage, **kwargs) + + +class MRUListExShellItemListPlugin(interface.KeyPlugin, MRUListExPluginMixin): + """Windows Registry plugin to parse a shell item list MRUListEx.""" + + NAME = 'winreg_mrulistex_shell_item_list' + DESCRIPTION = u'Parser for Most Recently Used (MRU) Registry data.' + + REG_TYPE = 'any' + REG_KEYS = frozenset([ + # The regular expression indicated a file extension (.jpg) or '*'. + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\ComDlg32\\' + u'OpenSavePidlMRU'), + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\StreamMRU']) + + def _ParseMRUListExEntryValue( + self, parser_context, key, entry_index, entry_number, codepage='cp1252', + file_entry=None, parser_chain=None, **unused_kwargs): + """Parses the MRUListEx entry value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUListEx value. + entry_index: integer value representing the MRUListEx entry index. + entry_number: integer value representing the entry number. + codepage: Optional extended ASCII string codepage. The default is cp1252. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Returns: + A string containing the value. + """ + value_string = u'' + + value = key.GetValue(u'{0:d}'.format(entry_number)) + if value is None: + logging.debug( + u'[{0:s}] Missing MRUListEx entry value: {1:d} in key: {2:s}.'.format( + self.NAME, entry_number, key.path)) + + elif not value.DataIsBinaryData(): + logging.debug(( + u'[{0:s}] Non-binary MRUListEx entry value: {1:d} in key: ' + u'{2:s}.').format(self.NAME, entry_number, key.path)) + + elif value.data: + shell_items_parser = shell_items.ShellItemsParser(key.path) + shell_items_parser.Parse( + parser_context, value.data, codepage=codepage, file_entry=file_entry, + parser_chain=parser_chain) + + value_string = u'Shell item list: [{0:s}]'.format( + shell_items_parser.CopyToPath()) + + return value_string + + def GetEntries( + self, parser_context, key=None, registry_type=None, codepage='cp1252', + file_entry=None, parser_chain=None, **unused_kwargs): + """Extract event objects from a Registry key containing a MRUListEx value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + """ + if key.name != u'OpenSavePidlMRU': + self._ParseMRUListExKey( + parser_context, key, registry_type=registry_type, codepage=codepage, + parser_chain=parser_chain, file_entry=file_entry) + + if key.name == u'OpenSavePidlMRU': + # For the OpenSavePidlMRU MRUListEx we also need to parse its subkeys + # since the Registry key path does not support wildcards yet. + for subkey in key.GetSubkeys(): + self._ParseMRUListExKey( + parser_context, subkey, registry_type=registry_type, + codepage=codepage, parser_chain=parser_chain, file_entry=file_entry) + + +class MRUListExStringAndShellItemPlugin( + interface.KeyPlugin, MRUListExPluginMixin): + """Windows Registry plugin to parse a string and shell item MRUListEx.""" + + NAME = 'winreg_mrulistex_string_and_shell_item' + DESCRIPTION = u'Parser for Most Recently Used (MRU) Registry data.' + + REG_TYPE = 'any' + REG_KEYS = frozenset([ + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RecentDocs']) + + _STRING_AND_SHELL_ITEM_STRUCT = construct.Struct( + 'string_and_shell_item', + construct.RepeatUntil( + lambda obj, ctx: obj == '\x00\x00', construct.Field('string', 2)), + construct.Anchor('shell_item')) + + def _ParseMRUListExEntryValue( + self, parser_context, key, entry_index, entry_number, codepage='cp1252', + file_entry=None, parser_chain=None, **unused_kwargs): + """Parses the MRUListEx entry value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUListEx value. + entry_index: integer value representing the MRUListEx entry index. + entry_number: integer value representing the entry number. + codepage: Optional extended ASCII string codepage. The default is cp1252. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Returns: + A string containing the value. + """ + value_string = u'' + + value = key.GetValue(u'{0:d}'.format(entry_number)) + if value is None: + logging.debug( + u'[{0:s}] Missing MRUListEx entry value: {1:d} in key: {2:s}.'.format( + self.NAME, entry_number, key.path)) + + elif not value.DataIsBinaryData(): + logging.debug(( + u'[{0:s}] Non-binary MRUListEx entry value: {1:d} in key: ' + u'{2:s}.').format(self.NAME, entry_number, key.path)) + + elif value.data: + value_struct = self._STRING_AND_SHELL_ITEM_STRUCT.parse(value.data) + + try: + # The struct includes the end-of-string character that we need + # to strip off. + path = ''.join(value_struct.string).decode('utf16')[:-1] + except UnicodeDecodeError as exception: + logging.warning(( + u'[{0:s}] Unable to decode string MRUListEx entry value: {1:d} ' + u'in key: {2:s} with error: {3:s}').format( + self.NAME, entry_number, key.path, exception)) + path = u'' + + if path: + shell_item_list_data = value.data[value_struct.shell_item:] + if not shell_item_list_data: + logging.debug(( + u'[{0:s}] Missing shell item in MRUListEx entry value: {1:d}' + u'in key: {2:s}').format(self.NAME, entry_number, key.path)) + value_string = u'Path: {0:s}'.format(path) + + else: + shell_items_parser = shell_items.ShellItemsParser(key.path) + shell_items_parser.Parse( + parser_context, shell_item_list_data, codepage=codepage, + parser_chain=parser_chain, file_entry=file_entry) + + value_string = u'Path: {0:s}, Shell item: [{1:s}]'.format( + path, shell_items_parser.CopyToPath()) + + return value_string + + def GetEntries( + self, parser_context, key=None, registry_type=None, codepage='cp1252', + file_entry=None, parser_chain=None, **unused_kwargs): + """Extract event objects from a Registry key containing a MRUListEx value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + self._ParseMRUListExKey( + parser_context, key, registry_type=registry_type, codepage=codepage, + parser_chain=parser_chain, file_entry=file_entry) + + if key.name == u'RecentDocs': + # For the RecentDocs MRUListEx we also need to parse its subkeys + # since the Registry key path does not support wildcards yet. + for subkey in key.GetSubkeys(): + self._ParseMRUListExKey( + parser_context, subkey, registry_type=registry_type, + codepage=codepage, parser_chain=parser_chain, file_entry=file_entry) + + +class MRUListExStringAndShellItemListPlugin( + interface.KeyPlugin, MRUListExPluginMixin): + """Windows Registry plugin to parse a string and shell item list MRUListEx.""" + + NAME = 'winreg_mrulistex_string_and_shell_item_list' + DESCRIPTION = u'Parser for Most Recently Used (MRU) Registry data.' + + REG_TYPE = 'any' + REG_KEYS = frozenset([ + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\ComDlg32\\' + u'LastVisitedPidlMRU')]) + + _STRING_AND_SHELL_ITEM_LIST_STRUCT = construct.Struct( + 'string_and_shell_item', + construct.RepeatUntil( + lambda obj, ctx: obj == '\x00\x00', construct.Field('string', 2)), + construct.Anchor('shell_item_list')) + + def _ParseMRUListExEntryValue( + self, parser_context, key, entry_index, entry_number, codepage='cp1252', + file_entry=None, parser_chain=None, **unused_kwargs): + """Parses the MRUListEx entry value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: the Registry key (instance of winreg.WinRegKey) that contains + the MRUListEx value. + entry_index: integer value representing the MRUListEx entry index. + entry_number: integer value representing the entry number. + codepage: Optional extended ASCII string codepage. The default is cp1252. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Returns: + A string containing the value. + """ + value_string = u'' + + value = key.GetValue(u'{0:d}'.format(entry_number)) + if value is None: + logging.debug( + u'[{0:s}] Missing MRUListEx entry value: {1:d} in key: {2:s}.'.format( + self.NAME, entry_number, key.path)) + + elif not value.DataIsBinaryData(): + logging.debug(( + u'[{0:s}] Non-binary MRUListEx entry value: {1:d} in key: ' + u'{2:s}.').format(self.NAME, entry_number, key.path)) + + elif value.data: + value_struct = self._STRING_AND_SHELL_ITEM_LIST_STRUCT.parse(value.data) + + try: + # The struct includes the end-of-string character that we need + # to strip off. + path = ''.join(value_struct.string).decode('utf16')[:-1] + except UnicodeDecodeError as exception: + logging.warning(( + u'[{0:s}] Unable to decode string MRUListEx entry value: {1:d} ' + u'in key: {2:s} with error: {3:s}').format( + self.NAME, entry_number, key.path, exception)) + path = u'' + + if path: + shell_item_list_data = value.data[value_struct.shell_item_list:] + if not shell_item_list_data: + logging.debug(( + u'[{0:s}] Missing shell item in MRUListEx entry value: {1:d}' + u'in key: {2:s}').format(self.NAME, entry_number, key.path)) + value_string = u'Path: {0:s}'.format(path) + + else: + shell_items_parser = shell_items.ShellItemsParser(key.path) + shell_items_parser.Parse( + parser_context, shell_item_list_data, codepage=codepage, + parser_chain=parser_chain, file_entry=file_entry) + + value_string = u'Path: {0:s}, Shell item list: [{1:s}]'.format( + path, shell_items_parser.CopyToPath()) + + return value_string + + def GetEntries( + self, parser_context, key=None, registry_type=None, codepage='cp1252', + file_entry=None, parser_chain=None, **unused_kwargs): + """Extract event objects from a Registry key containing a MRUListEx value. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + codepage: Optional extended ASCII string codepage. The default is cp1252. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + self._ParseMRUListExKey( + parser_context, key, registry_type=registry_type, codepage=codepage, + parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugins([ + MRUListExStringPlugin, MRUListExShellItemListPlugin, + MRUListExStringAndShellItemPlugin, MRUListExStringAndShellItemListPlugin]) diff --git a/plaso/parsers/winreg_plugins/mrulistex_test.py b/plaso/parsers/winreg_plugins/mrulistex_test.py new file mode 100644 index 0000000..28231e5 --- /dev/null +++ b/plaso/parsers/winreg_plugins/mrulistex_test.py @@ -0,0 +1,303 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the MRUListEx Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import mrulistex +from plaso.parsers.winreg_plugins import test_lib +from plaso.winreg import interface as winreg_interface +from plaso.winreg import test_lib as winreg_test_lib + + +class TestMRUListExStringPlugin(test_lib.RegistryPluginTestCase): + """Tests for the string MRUListEx plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = mrulistex.MRUListExStringPlugin() + + def testProcess(self): + """Tests the Process function.""" + key_path = u'\\Microsoft\\Some Windows\\InterestingApp\\MRUlist' + values = [] + + # The order is: 201 + values.append(winreg_test_lib.TestRegValue( + 'MRUListEx', '\x02\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00', + winreg_interface.WinRegValue.REG_BINARY, 123)) + values.append(winreg_test_lib.TestRegValue( + '0', 'Some random text here'.encode('utf_16_le'), + winreg_interface.WinRegValue.REG_SZ, 1892)) + values.append(winreg_test_lib.TestRegValue( + '1', 'c:\\evil.exe'.encode('utf_16_le'), + winreg_interface.WinRegValue.REG_BINARY, 612)) + values.append(winreg_test_lib.TestRegValue( + '2', 'C:\\looks_legit.exe'.encode('utf_16_le'), + winreg_interface.WinRegValue.REG_SZ, 1001)) + + winreg_key = winreg_test_lib.TestRegKey( + key_path, 1346145829002031, values, 1456) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + # A MRUListEx event object. + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-08-28 09:23:49.002031') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[{0:s}] ' + u'Index: 1 [MRU Value 2]: C:\\looks_legit.exe ' + u'Index: 2 [MRU Value 0]: Some random text here ' + u'Index: 3 [MRU Value 1]: c:\\evil.exe').format(key_path) + + expected_msg_short = ( + u'[{0:s}] Index: 1 [MRU Value 2]: C:\\l...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +class TestMRUListExShellItemListPlugin(test_lib.RegistryPluginTestCase): + """Tests for the shell item list MRUListEx plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = mrulistex.MRUListExShellItemListPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER-WIN7.DAT']) + key_path = ( + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\ComDlg32\\' + u'OpenSavePidlMRU') + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 65) + + # A MRUListEx event object. + event_object = event_objects[40] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-08-28 22:48:28.159308') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[{0:s}\\exe] ' + u'Index: 1 [MRU Value 1]: Shell item list: [My Computer, P:\\, ' + u'Application Tools, Firefox 6.0, Firefox Setup 6.0.exe] ' + u'Index: 2 [MRU Value 0]: Shell item list: [Computers and Devices, ' + u'UNKNOWN: 0x00, \\\\controller\\WebDavShare, Firefox Setup 3.6.12.exe' + u']').format(key_path) + + expected_msg_short = ( + u'[\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\ComDlg32\\' + u'OpenSavePidlMRU...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + # A shell item event object. + event_object = event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-03-08 22:16:02') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'Name: ALLOYR~1 ' + u'Long name: Alloy Research ' + u'NTFS file reference: 44518-33 ' + u'Origin: {0:s}\\*').format(key_path) + + expected_msg_short = ( + u'Name: ALLOYR~1 ' + u'NTFS file reference: 44518-33 ' + u'Origin: \\Software\\Microsoft\\Wind...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +class TestMRUListExStringAndShellItemPlugin(test_lib.RegistryPluginTestCase): + """Tests for the string and shell item MRUListEx plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = mrulistex.MRUListExStringAndShellItemPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER-WIN7.DAT']) + key_path = ( + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RecentDocs') + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 6) + + # A MRUListEx event object. + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-01 13:52:39.113741') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[{0:s}] ' + u'Index: 1 [MRU Value 17]: Path: The SHIELD, ' + u'Shell item: [The SHIELD.lnk] ' + u'Index: 10 [MRU Value 11]: Path: 5031RR_BalancedLeadership.pdf, ' + u'Shell item: [5031RR_BalancedLeadership.lnk] ' + u'Index: 11 [MRU Value 10]: ' + u'Path: SA-23E Mitchell-Hyundyne Starfury.docx, ' + u'Shell item: [SA-23E Mitchell-Hyundyne Starfury.lnk] ' + u'Index: 12 [MRU Value 9]: Path: StarFury.docx, ' + u'Shell item: [StarFury (3).lnk] ' + u'Index: 13 [MRU Value 6]: Path: StarFury.zip, ' + u'Shell item: [StarFury.lnk] ' + u'Index: 14 [MRU Value 4]: Path: VIBRANIUM.docx, ' + u'Shell item: [VIBRANIUM.lnk] ' + u'Index: 15 [MRU Value 5]: Path: ADAMANTIUM-Background.docx, ' + u'Shell item: [ADAMANTIUM-Background.lnk] ' + u'Index: 16 [MRU Value 3]: Path: Pictures, ' + u'Shell item: [Pictures.lnk] ' + u'Index: 17 [MRU Value 2]: Path: nick_fury_77831.jpg, ' + u'Shell item: [nick_fury_77831.lnk] ' + u'Index: 18 [MRU Value 1]: Path: Downloads, ' + u'Shell item: [Downloads.lnk] ' + u'Index: 19 [MRU Value 0]: Path: wallpaper_medium.jpg, ' + u'Shell item: [wallpaper_medium.lnk] ' + u'Index: 2 [MRU Value 18]: ' + u'Path: captain_america_shield_by_almogrem-d48x9x8.jpg, ' + u'Shell item: [captain_america_shield_by_almogrem-d48x9x8.lnk] ' + u'Index: 3 [MRU Value 16]: Path: captain-america-shield-front.jpg, ' + u'Shell item: [captain-america-shield-front.lnk] ' + u'Index: 4 [MRU Value 12]: Path: Leadership, ' + u'Shell item: [Leadership.lnk] ' + u'Index: 5 [MRU Value 15]: Path: followership.pdf, ' + u'Shell item: [followership.lnk] ' + u'Index: 6 [MRU Value 14]: Path: leaderqualities.pdf, ' + u'Shell item: [leaderqualities.lnk] ' + u'Index: 7 [MRU Value 13]: Path: htlhtl.pdf, ' + u'Shell item: [htlhtl.lnk] ' + u'Index: 8 [MRU Value 8]: Path: StarFury, ' + u'Shell item: [StarFury (2).lnk] ' + u'Index: 9 [MRU Value 7]: Path: Earth_SA-26_Thunderbolt.jpg, ' + u'Shell item: [Earth_SA-26_Thunderbolt.lnk]').format(key_path) + + expected_msg_short = ( + u'[{0:s}] Index: 1 [MR...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +class TestMRUListExStringAndShellItemListPlugin( + test_lib.RegistryPluginTestCase): + """Tests for the string and shell item list MRUListEx plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = mrulistex.MRUListExStringAndShellItemListPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER-WIN7.DAT']) + key_path = ( + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\ComDlg32\\' + u'LastVisitedPidlMRU') + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 31) + + # A MRUListEx event object. + event_object = event_objects[30] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-01 13:52:38.966290') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[{0:s}] ' + u'Index: 1 [MRU Value 1]: Path: chrome.exe, ' + u'Shell item list: [Users Libraries, UNKNOWN: 0x00, UNKNOWN: 0x00, ' + u'UNKNOWN: 0x00] ' + u'Index: 2 [MRU Value 7]: ' + u'Path: {{48E1ED6B-CF49-4609-B1C1-C082BFC3D0B4}}, ' + u'Shell item list: [Shared Documents Folder (Users Files), ' + u'UNKNOWN: 0x00, Alloy Research] ' + u'Index: 3 [MRU Value 6]: ' + u'Path: {{427865A0-03AF-4F25-82EE-10B6CB1DED3E}}, ' + u'Shell item list: [Users Libraries, UNKNOWN: 0x00, UNKNOWN: 0x00] ' + u'Index: 4 [MRU Value 5]: ' + u'Path: {{24B5C9BB-48B5-47FF-8343-40481DBA1E2B}}, ' + u'Shell item list: [My Computer, C:\\, Users, nfury, Documents] ' + u'Index: 5 [MRU Value 4]: ' + u'Path: {{0B8CFE96-DB69-4D33-8E3C-36EAB4F709E0}}, ' + u'Shell item list: [My Computer, C:\\, Users, nfury, Documents, ' + u'Alloy Research] ' + u'Index: 6 [MRU Value 3]: ' + u'Path: {{D4F85F66-003D-4127-BCE9-CAD7A57B2857}}, ' + u'Shell item list: [Users Libraries, UNKNOWN: 0x00, UNKNOWN: 0x00] ' + u'Index: 7 [MRU Value 0]: Path: iexplore.exe, ' + u'Shell item list: [My Computer, P:\\, Application Tools, Firefox 6.0] ' + u'Index: 8 [MRU Value 2]: Path: Skype.exe, ' + u'Shell item list: [Users Libraries, UNKNOWN: 0x00]').format(key_path) + + expected_msg_short = ( + u'[\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\ComDlg32\\' + u'LastVisitedPidl...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/msie_zones.py b/plaso/parsers/winreg_plugins/msie_zones.py new file mode 100644 index 0000000..211f051 --- /dev/null +++ b/plaso/parsers/winreg_plugins/msie_zones.py @@ -0,0 +1,292 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the MSIE zone settings plugin.""" + +from plaso.events import windows_events +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +__author__ = 'Elizabeth Schweinsberg (beth@bethlogic.net)' + + +class MsieZoneSettingsPlugin(interface.KeyPlugin): + """Windows Registry plugin for parsing the MSIE Zones settings.""" + + NAME = 'winreg_msie_zone' + DESCRIPTION = u'Parser for Internet Explorer zone settings Registry data.' + + REG_TYPE = 'NTUSER' + + REG_KEYS = [ + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings' + u'\\Zones'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings' + u'\\Lockdown_Zones')] + + URLS = ['http://support.microsoft.com/kb/182569'] + + ZONE_NAMES = { + '0': '0 (My Computer)', + '1': '1 (Local Intranet Zone)', + '2': '2 (Trusted sites Zone)', + '3': '3 (Internet Zone)', + '4': '4 (Restricted Sites Zone)', + '5': '5 (Custom)' + } + + KNOWN_PERMISSIONS_VALUE_NAMES = [ + '1001', '1004', '1200', '1201', '1400', '1402', '1405', '1406', '1407', + '1601', '1604', '1606', '1607', '1608', '1609', '1800', '1802', '1803', + '1804', '1809', '1A04', '2000', '2001', '2004', '2100', '2101', '2102', + '2200', '2201', '2300'] + + CONTROL_VALUES_PERMISSIONS = { + 0x00000000: '0 (Allow)', + 0x00000001: '1 (Prompt User)', + 0x00000003: '3 (Not Allowed)', + 0x00010000: '0x00010000 (Administrator approved)' + } + + CONTROL_VALUES_SAFETY = { + 0x00010000: '0x00010000 (High safety)', + 0x00020000: '0x00020000 (Medium safety)', + 0x00030000: '0x00030000 (Low safety)' + } + + CONTROL_VALUES_1A00 = { + 0x00000000: ('0x00000000 (Automatic logon with current user name and ' + 'password)'), + 0x00010000: '0x00010000 (Prompt for user name and password)', + 0x00020000: '0x00020000 (Automatic logon only in Intranet zone)', + 0x00030000: '0x00030000 (Anonymous logon)' + } + + CONTROL_VALUES_1C00 = { + 0x00000000: '0x00000000 (Disable Java)', + 0x00010000: '0x00010000 (High safety)', + 0x00020000: '0x00020000 (Medium safety)', + 0x00030000: '0x00030000 (Low safety)', + 0x00800000: '0x00800000 (Custom)' + } + + FEATURE_CONTROLS = { + '1200': 'Run ActiveX controls and plug-ins', + '1400': 'Active scripting', + '1001': 'Download signed ActiveX controls', + '1004': 'Download unsigned ActiveX controls', + '1201': 'Initialize and script ActiveX controls not marked as safe', + '1206': 'Allow scripting of IE Web browser control', + '1207': 'Reserved', + '1208': 'Allow previously unused ActiveX controls to run without prompt', + '1209': 'Allow Scriptlets', + '120A': 'Override Per-Site (domain-based) ActiveX restrictions', + '120B': 'Override Per-Site (domain-based) ActiveX restrictions', + '1402': 'Scripting of Java applets', + '1405': 'Script ActiveX controls marked as safe for scripting', + '1406': 'Access data sources across domains', + '1407': 'Allow Programmatic clipboard access', + '1408': 'Reserved', + '1601': 'Submit non-encrypted form data', + '1604': 'Font download', + '1605': 'Run Java', + '1606': 'Userdata persistence', + '1607': 'Navigate sub-frames across different domains', + '1608': 'Allow META REFRESH', + '1609': 'Display mixed content', + '160A': 'Include local directory path when uploading files to a server', + '1800': 'Installation of desktop items', + '1802': 'Drag and drop or copy and paste files', + '1803': 'File Download', + '1804': 'Launching programs and files in an IFRAME', + '1805': 'Launching programs and files in webview', + '1806': 'Launching applications and unsafe files', + '1807': 'Reserved', + '1808': 'Reserved', + '1809': 'Use Pop-up Blocker', + '180A': 'Reserved', + '180B': 'Reserved', + '180C': 'Reserved', + '180D': 'Reserved', + '1A00': 'User Authentication: Logon', + '1A02': 'Allow persistent cookies that are stored on your computer', + '1A03': 'Allow per-session cookies (not stored)', + '1A04': 'Don\'t prompt for client cert selection when no certs exists', + '1A05': 'Allow 3rd party persistent cookies', + '1A06': 'Allow 3rd party session cookies', + '1A10': 'Privacy Settings', + '1C00': 'Java permissions', + '1E05': 'Software channel permissions', + '1F00': 'Reserved', + '2000': 'Binary and script behaviors', + '2001': '.NET: Run components signed with Authenticode', + '2004': '.NET: Run components not signed with Authenticode', + '2100': 'Open files based on content, not file extension', + '2101': 'Web sites in less privileged zone can navigate into this zone', + '2102': ('Allow script initiated windows without size/position ' + 'constraints'), + '2103': 'Allow status bar updates via script', + '2104': 'Allow websites to open windows without address or status bars', + '2105': 'Allow websites to prompt for information using scripted windows', + '2200': 'Automatic prompting for file downloads', + '2201': 'Automatic prompting for ActiveX controls', + '2300': 'Allow web pages to use restricted protocols for active content', + '2301': 'Use Phishing Filter', + '2400': '.NET: XAML browser applications', + '2401': '.NET: XPS documents', + '2402': '.NET: Loose XAML', + '2500': 'Turn on Protected Mode', + '2600': 'Enable .NET Framework setup', + '{AEBA21FA-782A-4A90-978D-B72164C80120}': 'First Party Cookie', + '{A8A88C49-5EB2-4990-A1A2-0876022C854F}': 'Third Party Cookie' + } + + def GetEntries( + self, parser_context, file_entry=None, key=None, registry_type=None, + parser_chain=None, **unused_kwargs): + """Retrieves information of the Internet Settings Zones values. + + The MSIE Feature controls are stored in the Zone specific subkeys in: + Internet Settings\\Zones key + Internet Settings\\Lockdown_Zones key + + Args: + parser_context: A parser context object (instance of ParserContext). + file_entry: optional file entry object (instance of dfvfs.FileEntry). + The default is None. + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + text_dict = {} + + if key.number_of_values == 0: + error_string = u'Key: {0:s} missing values.'.format(key.path) + parser_context.ProduceParseError( + self.NAME, error_string, file_entry=file_entry) + + else: + for value in key.GetValues(): + if not value.name: + value_name = '(default)' + else: + value_name = u'{0:s}'.format(value.name) + + if value.DataIsString(): + value_string = u'[{0:s}] {1:s}'.format( + value.data_type_string, value.data) + elif value.DataIsInteger(): + value_string = u'[{0:s}] {1:d}'.format( + value.data_type_string, value.data) + elif value.DataIsMultiString(): + value_string = u'[{0:s}] {1:s}'.format( + value.data_type_string, u''.join(value.data)) + else: + value_string = u'[{0:s}]'.format(value.data_type_string) + + text_dict[value_name] = value_string + + # Generate at least one event object for the key. + event_object = windows_events.WindowsRegistryEvent( + key.last_written_timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type, urls=self.URLS) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if key.number_of_subkeys == 0: + error_string = u'Key: {0:s} missing subkeys.'.format(key.path) + parser_context.ProduceParseError( + self.NAME, error_string, file_entry=file_entry) + return + + for zone_key in key.GetSubkeys(): + # TODO: these values are stored in the Description value of the + # zone key. This solution will break on zone values that are larger + # than 5. + path = u'{0:s}\\{1:s}'.format(key.path, self.ZONE_NAMES[zone_key.name]) + + text_dict = {} + + # TODO: this plugin currently just dumps the values and does not + # distinguish between what is a feature control or not. + for value in zone_key.GetValues(): + # Ignore the default value. + if not value.name: + continue + + if value.DataIsString(): + value_string = value.data + + elif value.DataIsInteger(): + if value.name in self.KNOWN_PERMISSIONS_VALUE_NAMES: + value_string = self.CONTROL_VALUES_PERMISSIONS.get( + value.data, u'UNKNOWN') + elif value.name == '1A00': + value_string = self.CONTROL_VALUES_1A00.get(value.data, u'UNKNOWN') + elif value.name == '1C00': + value_string = self.CONTROL_VALUES_1C00.get(value.data, u'UNKNOWN') + elif value.name == '1E05': + value_string = self.CONTROL_VALUES_SAFETY.get( + value.data, u'UNKNOWN') + else: + value_string = u'{0:d}'.format(value.data) + + else: + value_string = u'[{0:s}]'.format(value.data_type_string) + + if len(value.name) == 4 and value.name != 'Icon': + value_description = self.FEATURE_CONTROLS.get(value.name, 'UNKNOWN') + else: + value_description = self.FEATURE_CONTROLS.get(value.name, '') + + if value_description: + feature_control = u'[{0:s}] {1:s}'.format( + value.name, value_description) + else: + feature_control = u'[{0:s}]'.format(value.name) + + text_dict[feature_control] = value_string + + event_object = windows_events.WindowsRegistryEvent( + zone_key.last_written_timestamp, path, text_dict, + offset=zone_key.offset, registry_type=registry_type, + urls=self.URLS) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +class MsieZoneSettingsSoftwareZonesPlugin(MsieZoneSettingsPlugin): + """Parses the Zones key in the Software hive.""" + + NAME = 'winreg_msie_zone_software' + + REG_TYPE = 'SOFTWARE' + REG_KEYS = [ + u'\\Microsoft\\Windows\\CurrentVersion\\Internet Settings\\Zones', + (u'\\Microsoft\\Windows\\CurrentVersion\\Internet Settings' + u'\\Lockdown_Zones'), + (u'\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Internet Settings' + u'\\Zones'), + (u'\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Internet Settings' + u'\\Lockdown_Zones')] + + +winreg.WinRegistryParser.RegisterPlugins([ + MsieZoneSettingsPlugin, MsieZoneSettingsSoftwareZonesPlugin]) diff --git a/plaso/parsers/winreg_plugins/msie_zones_test.py b/plaso/parsers/winreg_plugins/msie_zones_test.py new file mode 100644 index 0000000..55c9729 --- /dev/null +++ b/plaso/parsers/winreg_plugins/msie_zones_test.py @@ -0,0 +1,384 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the MSIE Zone settings Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import msie_zones +from plaso.parsers.winreg_plugins import test_lib + + +class MsieZoneSettingsSoftwareZonesPluginTest(test_lib.RegistryPluginTestCase): + """Tests for Internet Settings Zones plugin on the Software hive.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = msie_zones.MsieZoneSettingsSoftwareZonesPlugin() + self._test_file = self._GetTestFilePath(['SOFTWARE']) + + def testProcessForZone(self): + """Tests the Process function.""" + key_path = u'\\Microsoft\\Windows\\CurrentVersion\\Internet Settings\\Zones' + winreg_key = self._GetKeyFromFile(self._test_file, key_path) + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 6) + + event_object = event_objects[1] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-08-28 21:32:44.937675') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'[1200] Run ActiveX controls and plug-ins' + expected_value = u'0 (Allow)' + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_msg = ( + u'[{0:s}\\0 (My Computer)] ' + u'[1001] Download signed ActiveX controls: 0 (Allow) ' + u'[1004] Download unsigned ActiveX controls: 0 (Allow) ' + u'[1200] Run ActiveX controls and plug-ins: 0 (Allow) ' + u'[1201] Initialize and script ActiveX controls not marked as safe: 1 ' + u'(Prompt User) ' + u'[1206] Allow scripting of IE Web browser control: 0 ' + u'[1207] Reserved: 0 ' + u'[1208] Allow previously unused ActiveX controls to run without ' + u'prompt: 0 ' + u'[1209] Allow Scriptlets: 0 ' + u'[120A] Override Per-Site (domain-based) ActiveX restrictions: 0 ' + u'[120B] Override Per-Site (domain-based) ActiveX restrictions: 0 ' + u'[1400] Active scripting: 0 (Allow) ' + u'[1402] Scripting of Java applets: 0 (Allow) ' + u'[1405] Script ActiveX controls marked as safe for scripting: 0 ' + u'(Allow) ' + u'[1406] Access data sources across domains: 0 (Allow) ' + u'[1407] Allow Programmatic clipboard access: 0 (Allow) ' + u'[1408] Reserved: 0 ' + u'[1409] UNKNOWN: 3 ' + u'[1601] Submit non-encrypted form data: 0 (Allow) ' + u'[1604] Font download: 0 (Allow) ' + u'[1605] Run Java: 0 ' + u'[1606] Userdata persistence: 0 (Allow) ' + u'[1607] Navigate sub-frames across different domains: 0 (Allow) ' + u'[1608] Allow META REFRESH: 0 (Allow) ' + u'[1609] Display mixed content: 1 (Prompt User) ' + u'[160A] Include local directory path when uploading files to a ' + u'server: 0 ' + u'[1802] Drag and drop or copy and paste files: 0 (Allow) ' + u'[1803] File Download: 0 (Allow) ' + u'[1804] Launching programs and files in an IFRAME: 0 (Allow) ' + u'[1805] Launching programs and files in webview: 0 ' + u'[1806] Launching applications and unsafe files: 0 ' + u'[1807] Reserved: 0 ' + u'[1808] Reserved: 0 ' + u'[1809] Use Pop-up Blocker: 3 (Not Allowed) ' + u'[180A] Reserved: 0 ' + u'[180C] Reserved: 0 ' + u'[180D] Reserved: 0 ' + u'[180E] UNKNOWN: 0 ' + u'[180F] UNKNOWN: 0 ' + u'[1A00] User Authentication: Logon: 0x00000000 (Automatic logon with ' + u'current user name and password) ' + u'[1A02] Allow persistent cookies that are stored on your computer: 0 ' + u'[1A03] Allow per-session cookies (not stored): 0 ' + u'[1A04] Don\'t prompt for client cert selection when no certs exists: ' + u'0 (Allow) ' + u'[1A05] Allow 3rd party persistent cookies: 0 ' + u'[1A06] Allow 3rd party session cookies: 0 ' + u'[1A10] Privacy Settings: 0 ' + u'[1C00] Java permissions: 0x00020000 (Medium safety) ' + u'[2000] Binary and script behaviors: 0 (Allow) ' + u'[2001] .NET: Run components signed with Authenticode: ' + u'3 (Not Allowed) ' + u'[2004] .NET: Run components not signed with Authenticode: ' + u'3 (Not Allowed) ' + u'[2005] UNKNOWN: 0 ' + u'[2007] UNKNOWN: 3 ' + u'[2100] Open files based on content, not file extension: 0 (Allow) ' + u'[2101] Web sites in less privileged zone can navigate into this ' + u'zone: 3 (Not Allowed) ' + u'[2102] Allow script initiated windows without size/position ' + u'constraints: 0 (Allow) ' + u'[2103] Allow status bar updates via script: 0 ' + u'[2104] Allow websites to open windows without address or status ' + u'bars: 0 ' + u'[2105] Allow websites to prompt for information using scripted ' + u'windows: 0 ' + u'[2106] UNKNOWN: 0 ' + u'[2107] UNKNOWN: 0 ' + u'[2200] Automatic prompting for file downloads: 0 (Allow) ' + u'[2201] Automatic prompting for ActiveX controls: 0 (Allow) ' + u'[2300] Allow web pages to use restricted protocols for active ' + u'content: 1 (Prompt User) ' + u'[2301] Use Phishing Filter: 3 ' + u'[2400] .NET: XAML browser applications: 0 ' + u'[2401] .NET: XPS documents: 0 ' + u'[2402] .NET: Loose XAML: 0 ' + u'[2500] Turn on Protected Mode: 3 ' + u'[2600] Enable .NET Framework setup: 0 ' + u'[2700] UNKNOWN: 3 ' + u'[2701] UNKNOWN: 0 ' + u'[2702] UNKNOWN: 3 ' + u'[2703] UNKNOWN: 3 ' + u'[2708] UNKNOWN: 0 ' + u'[2709] UNKNOWN: 0 ' + u'[CurrentLevel]: 0 ' + u'[Description]: Your computer ' + u'[DisplayName]: Computer ' + u'[Flags]: 33 ' + u'[Icon]: shell32.dll#0016 ' + u'[LowIcon]: inetcpl.cpl#005422 ' + u'[PMDisplayName]: Computer ' + u'[Protected Mode]').format(key_path) + + expected_msg_short = u'[{0:s}\\0 (My Computer)] [...'.format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + def testProcessForLockDown(self): + """Tests the Process function for the lockdown zone key.""" + key_path = ( + u'\\Microsoft\\Windows\\CurrentVersion\\Internet Settings' + u'\\Lockdown_Zones') + winreg_key = self._GetKeyFromFile(self._test_file, key_path) + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 6) + + event_object = event_objects[1] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-08-28 21:32:44.937675') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'[1200] Run ActiveX controls and plug-ins' + expected_value = u'3 (Not Allowed)' + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_msg = ( + u'[{0:s}\\0 (My Computer)] ' + u'[1001] Download signed ActiveX controls: 1 (Prompt User) ' + u'[1004] Download unsigned ActiveX controls: 3 (Not Allowed) ' + u'[1200] Run ActiveX controls and plug-ins: 3 (Not Allowed) ' + u'[1201] Initialize and script ActiveX controls not marked as safe: 3 ' + u'(Not Allowed) ' + u'[1206] Allow scripting of IE Web browser control: 0 ' + u'[1207] Reserved: 3 ' + u'[1208] Allow previously unused ActiveX controls to run without ' + u'prompt: 3 ' + u'[1209] Allow Scriptlets: 3 ' + u'[120A] Override Per-Site (domain-based) ActiveX restrictions: 3 ' + u'[120B] Override Per-Site (domain-based) ActiveX restrictions: 0 ' + u'[1400] Active scripting: 1 (Prompt User) ' + u'[1402] Scripting of Java applets: 0 (Allow) ' + u'[1405] Script ActiveX controls marked as safe for scripting: 0 ' + u'(Allow) ' + u'[1406] Access data sources across domains: 0 (Allow) ' + u'[1407] Allow Programmatic clipboard access: 1 (Prompt User) ' + u'[1408] Reserved: 3 ' + u'[1409] UNKNOWN: 3 ' + u'[1601] Submit non-encrypted form data: 0 (Allow) ' + u'[1604] Font download: 0 (Allow) ' + u'[1605] Run Java: 0 ' + u'[1606] Userdata persistence: 0 (Allow) ' + u'[1607] Navigate sub-frames across different domains: 0 (Allow) ' + u'[1608] Allow META REFRESH: 0 (Allow) ' + u'[1609] Display mixed content: 1 (Prompt User) ' + u'[160A] Include local directory path when uploading files to a ' + u'server: 0 ' + u'[1802] Drag and drop or copy and paste files: 0 (Allow) ' + u'[1803] File Download: 0 (Allow) ' + u'[1804] Launching programs and files in an IFRAME: 0 (Allow) ' + u'[1805] Launching programs and files in webview: 0 ' + u'[1806] Launching applications and unsafe files: 0 ' + u'[1807] Reserved: 0 ' + u'[1808] Reserved: 0 ' + u'[1809] Use Pop-up Blocker: 3 (Not Allowed) ' + u'[180A] Reserved: 0 ' + u'[180C] Reserved: 0 ' + u'[180D] Reserved: 0 ' + u'[180E] UNKNOWN: 0 ' + u'[180F] UNKNOWN: 0 ' + u'[1A00] User Authentication: Logon: 0x00000000 (Automatic logon with ' + u'current user name and password) ' + u'[1A02] Allow persistent cookies that are stored on your computer: 0 ' + u'[1A03] Allow per-session cookies (not stored): 0 ' + u'[1A04] Don\'t prompt for client cert selection when no certs exists: ' + u'3 (Not Allowed) ' + u'[1A05] Allow 3rd party persistent cookies: 0 ' + u'[1A06] Allow 3rd party session cookies: 0 ' + u'[1A10] Privacy Settings: 0 ' + u'[1C00] Java permissions: 0x00000000 (Disable Java) ' + u'[2000] Binary and script behaviors: 0x00010000 ' + u'(Administrator approved) ' + u'[2005] UNKNOWN: 3 ' + u'[2100] Open files based on content, not file extension: 3 ' + u'(Not Allowed) ' + u'[2101] Web sites in less privileged zone can navigate into this ' + u'zone: 3 (Not Allowed) ' + u'[2102] Allow script initiated windows without size/position ' + u'constraints: ' + u'3 (Not Allowed) ' + u'[2103] Allow status bar updates via script: 3 ' + u'[2104] Allow websites to open windows without address or status ' + u'bars: 3 ' + u'[2105] Allow websites to prompt for information using scripted ' + u'windows: 3 ' + u'[2106] UNKNOWN: 3 ' + u'[2107] UNKNOWN: 3 ' + u'[2200] Automatic prompting for file downloads: 3 (Not Allowed) ' + u'[2201] Automatic prompting for ActiveX controls: 3 (Not Allowed) ' + u'[2301] Use Phishing Filter: 3 ' + u'[2400] .NET: XAML browser applications: 0 ' + u'[2401] .NET: XPS documents: 0 ' + u'[2402] .NET: Loose XAML: 0 ' + u'[2500] Turn on Protected Mode: 3 ' + u'[2600] Enable .NET Framework setup: 0 ' + u'[2700] UNKNOWN: 3 ' + u'[2701] UNKNOWN: 3 ' + u'[2702] UNKNOWN: 3 ' + u'[2703] UNKNOWN: 3 ' + u'[2708] UNKNOWN: 0 ' + u'[2709] UNKNOWN: 0 ' + u'[CurrentLevel]: 0 ' + u'[Description]: Your computer ' + u'[DisplayName]: Computer ' + u'[Flags]: 33 ' + u'[Icon]: shell32.dll#0016 ' + u'[LowIcon]: inetcpl.cpl#005422 ' + u'[PMDisplayName]: Computer ' + u'[Protected Mode]').format(key_path) + + expected_msg_short = u'[{0:s}\\0 (My Com...'.format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +class MsieZoneSettingsUserZonesPluginTest(test_lib.RegistryPluginTestCase): + """Tests for Internet Settings Zones plugin on the User hive.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = msie_zones.MsieZoneSettingsPlugin() + self._test_file = self._GetTestFilePath(['NTUSER-WIN7.DAT']) + + def testProcessForZone(self): + """Tests the Process function.""" + key_path = ( + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings' + u'\\Zones') + winreg_key = self._GetKeyFromFile(self._test_file, key_path) + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 6) + + event_object = event_objects[1] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-09-16 21:12:40.145514') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'[1200] Run ActiveX controls and plug-ins' + expected_value = u'0 (Allow)' + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_msg = ( + u'[{0:s}\\0 (My Computer)] ' + u'[1200] Run ActiveX controls and plug-ins: 0 (Allow) ' + u'[1400] Active scripting: 0 (Allow) ' + u'[2001] .NET: Run components signed with Authenticode: 3 (Not ' + u'Allowed) ' + u'[2004] .NET: Run components not signed with Authenticode: 3 (Not ' + u'Allowed) ' + u'[2007] UNKNOWN: 3 ' + u'[CurrentLevel]: 0 ' + u'[Description]: Your computer ' + u'[DisplayName]: Computer ' + u'[Flags]: 33 [Icon]: shell32.dll#0016 ' + u'[LowIcon]: inetcpl.cpl#005422 ' + u'[PMDisplayName]: Computer ' + u'[Protected Mode]').format(key_path) + + expected_msg_short = u'[{0:s}\\0 (My Com...'.format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + def testProcessForLockDown(self): + """Tests the Process function.""" + key_path = ( + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings' + u'\\Lockdown_Zones') + winreg_key = self._GetKeyFromFile(self._test_file, key_path) + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 6) + + event_object = event_objects[1] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-09-16 21:12:40.145514') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'[1200] Run ActiveX controls and plug-ins' + expected_value = u'3 (Not Allowed)' + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_msg = ( + u'[{0:s}\\0 (My Computer)] ' + u'[1200] Run ActiveX controls and plug-ins: 3 (Not Allowed) ' + u'[1400] Active scripting: 1 (Prompt User) ' + u'[CurrentLevel]: 0 ' + u'[Description]: Your computer ' + u'[DisplayName]: Computer ' + u'[Flags]: 33 ' + u'[Icon]: shell32.dll#0016 ' + u'[LowIcon]: inetcpl.cpl#005422 ' + u'[PMDisplayName]: Computer ' + u'[Protected Mode]').format(key_path) + + expected_msg_short = u'[{0:s}\\...'.format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/officemru.py b/plaso/parsers/winreg_plugins/officemru.py new file mode 100644 index 0000000..487133a --- /dev/null +++ b/plaso/parsers/winreg_plugins/officemru.py @@ -0,0 +1,116 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a parser for MS Office MRUs for Plaso.""" + +import logging +import re + +from plaso.events import windows_events +from plaso.lib import timelib +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class OfficeMRUPlugin(interface.KeyPlugin): + """Plugin that parses Microsoft Office MRU keys.""" + + NAME = 'winreg_office_mru' + DESCRIPTION = u'Parser for Microsoft Office MRU Registry data.' + + REG_TYPE = 'NTUSER' + + REG_KEYS = [ + u'\\Software\\Microsoft\\Office\\14.0\\Word\\Place MRU', + u'\\Software\\Microsoft\\Office\\14.0\\Access\\File MRU', + u'\\Software\\Microsoft\\Office\\14.0\\Access\\Place MRU', + u'\\Software\\Microsoft\\Office\\14.0\\PowerPoint\\File MRU', + u'\\Software\\Microsoft\\Office\\14.0\\PowerPoint\\Place MRU', + u'\\Software\\Microsoft\\Office\\14.0\\Excel\\File MRU', + u'\\Software\\Microsoft\\Office\\14.0\\Excel\\Place MRU', + u'\\Software\\Microsoft\\Office\\14.0\\Word\\File MRU'] + + _RE_VALUE_NAME = re.compile(r'^Item [0-9]+$', re.I) + + # The Office 12 item MRU is formatted as: + # [F00000000][T%FILETIME%]*\\%FILENAME% + + # The Office 14 item MRU is formatted as: + # [F00000000][T%FILETIME%][O00000000]*%FILENAME% + _RE_VALUE_DATA = re.compile(r'\[F00000000\]\[T([0-9A-Z]+)\].*\*[\\]?(.*)') + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Collect Values under Office 2010 MRUs and return events for each one. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + # TODO: Test other Office versions to make sure this plugin is applicable. + for value in key.GetValues(): + # Ignore any value not in the form: 'Item [0-9]+'. + if not value.name or not self._RE_VALUE_NAME.search(value.name): + continue + + # Ignore any value that is empty or that does not contain a string. + if not value.data or not value.DataIsString(): + continue + + values = self._RE_VALUE_DATA.findall(value.data) + + # Values will contain a list containing a tuple containing 2 values. + if len(values) != 1 or len(values[0]) != 2: + continue + + try: + filetime = int(values[0][0], 16) + except ValueError: + logging.warning('Unable to convert filetime string to an integer.') + filetime = 0 + + # TODO: why this behavior? Only the first Item is stored with its + # timestamp. Shouldn't this be: Store all the Item # values with + # their timestamp and store the entire MRU as one event with the + # registry key last written time? + if value.name == 'Item 1': + timestamp = timelib.Timestamp.FromFiletime(filetime) + else: + timestamp = 0 + + text_dict = {} + text_dict[value.name] = value.data + + event_object = windows_events.WindowsRegistryEvent( + timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type, + source_append=': Microsoft Office MRU') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(OfficeMRUPlugin) diff --git a/plaso/parsers/winreg_plugins/officemru_test.py b/plaso/parsers/winreg_plugins/officemru_test.py new file mode 100644 index 0000000..7f7cb22 --- /dev/null +++ b/plaso/parsers/winreg_plugins/officemru_test.py @@ -0,0 +1,76 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Microsoft Office MRUs Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import officemru +from plaso.parsers.winreg_plugins import test_lib + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class OfficeMRUPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the Microsoft Office MRUs Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = officemru.OfficeMRUPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER-WIN7.DAT']) + key_path = u'\\Software\\Microsoft\\Office\\14.0\\Word\\File MRU' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 5) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-03-13 18:27:15.083') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'Item 1' + expected_value = ( + u'[F00000000][T01CD0146EA1EADB0][O00000000]*' + u'C:\\Users\\nfury\\Documents\\StarFury\\StarFury\\' + u'SA-23E Mitchell-Hyundyne Starfury.docx') + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_msg = u'[{0:s}] {1:s}: {2:s}'.format( + key_path, regvalue_identifier, expected_value) + expected_msg_short = u'[{0:s}] {1:s}: [F00000000][T01CD0146...'.format( + key_path, regvalue_identifier) + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/outlook.py b/plaso/parsers/winreg_plugins/outlook.py new file mode 100644 index 0000000..a4a2cf8 --- /dev/null +++ b/plaso/parsers/winreg_plugins/outlook.py @@ -0,0 +1,97 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an Outlook Registry parser.""" + +from plaso.events import windows_events +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class OutlookSearchMRUPlugin(interface.KeyPlugin): + """Windows Registry plugin parsing Outlook Search MRU keys.""" + + NAME = 'winreg_outlook_mru' + DESCRIPTION = u'Parser for Microsoft Outlook search MRU Registry data.' + + REG_KEYS = [ + u'\\Software\\Microsoft\\Office\\15.0\\Outlook\\Search', + u'\\Software\\Microsoft\\Office\\14.0\\Outlook\\Search'] + + # TODO: The catalog for Office 2013 (15.0) contains binary values not + # dword values. Check if Office 2007 and 2010 have the same. Re-enable the + # plug-ins once confirmed and OutlookSearchMRUPlugin has been extended to + # handle the binary data or create a OutlookSearchCatalogMRUPlugin. + # Registry keys for: + # MS Outlook 2007 Search Catalog: + # '\\Software\\Microsoft\\Office\\12.0\\Outlook\\Catalog' + # MS Outlook 2010 Search Catalog: + # '\\Software\\Microsoft\\Office\\14.0\\Outlook\\Search\\Catalog' + # MS Outlook 2013 Search Catalog: + # '\\Software\\Microsoft\\Office\\15.0\\Outlook\\Search\\Catalog' + + REG_TYPE = 'NTUSER' + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Collect the values under Outlook and return event for each one. + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + """ + value_index = 0 + for value in key.GetValues(): + # Ignore the default value. + if not value.name: + continue + + # Ignore any value that is empty or that does not contain an integer. + if not value.data or not value.DataIsInteger(): + continue + + # TODO: change this 32-bit integer into something meaningful, for now + # the value name is the most interesting part. + text_dict = {} + text_dict[value.name] = '0x{0:08x}'.format(value.data) + + if value_index == 0: + timestamp = key.last_written_timestamp + else: + timestamp = 0 + + event_object = windows_events.WindowsRegistryEvent( + timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type, + source_append=': PST Paths') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + value_index += 1 + + +winreg.WinRegistryParser.RegisterPlugin(OutlookSearchMRUPlugin) diff --git a/plaso/parsers/winreg_plugins/outlook_test.py b/plaso/parsers/winreg_plugins/outlook_test.py new file mode 100644 index 0000000..374cdb6 --- /dev/null +++ b/plaso/parsers/winreg_plugins/outlook_test.py @@ -0,0 +1,103 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Outlook Windows Registry plugins.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.parsers.winreg_plugins import outlook +from plaso.parsers.winreg_plugins import test_lib +from plaso.winreg import test_lib as winreg_test_lib + + +class MSOutlook2013SearchMRUPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the Outlook Search MRU Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = outlook.OutlookSearchMRUPlugin() + + def testProcess(self): + """Tests the Process function.""" + key_path = u'\\Software\\Microsoft\\Office\\15.0\\Outlook\\Search' + values = [] + + values.append(winreg_test_lib.TestRegValue( + ('C:\\Users\\username\\AppData\\Local\\Microsoft\\Outlook\\' + 'username@example.com.ost'), '\xcf\x2b\x37\x00', + winreg_test_lib.TestRegValue.REG_DWORD, offset=1892)) + + winreg_key = winreg_test_lib.TestRegKey( + key_path, 1346145829002031, values, 1456) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + expected_msg = ( + u'[{0:s}] ' + u'C:\\Users\\username\\AppData\\Local\\Microsoft\\Outlook\\' + u'username@example.com.ost: 0x00372bcf').format(key_path) + + expected_msg_short = u'[{0:s}] C:\\Users\\username\\AppData\\Lo...'.format( + key_path) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + self.assertEquals(event_object.timestamp, 1346145829002031) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +# TODO: The catalog for Office 2013 (15.0) contains binary values not +# dword values. Check if Office 2007 and 2010 have the same. Re-enable the +# plug-ins once confirmed and OutlookSearchMRUPlugin has been extended to +# handle the binary data or create a OutlookSearchCatalogMRUPlugin. + +# class MSOutlook2013SearchCatalogMRUPluginTest(unittest.TestCase): +# """Tests for the Outlook Search Catalog MRU Windows Registry plugin.""" +# +# def setUp(self): +# """Sets up the needed objects used throughout the test.""" +# self._plugin = outlook.MSOutlook2013SearchCatalogMRUPlugin() +# +# def testProcess(self): +# """Tests the Process function.""" +# key_path = ( +# u'\\Software\\Microsoft\\Office\\15.0\\Outlook\\Search\\Catalog') +# values = [] +# +# values.append(winreg_test_lib.TestRegValue( +# ('C:\\Users\\username\\AppData\\Local\\Microsoft\\Outlook\\' +# 'username@example.com.ost'), '\x94\x01\x00\x00\x00\x00', +# winreg_test_lib.TestRegValue.REG_BINARY, offset=827)) +# +# winreg_key = winreg_test_lib.TestRegKey( +# key_path, 1346145829002031, values, 3421) +# +# # TODO: add test for Catalog key. + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/run.py b/plaso/parsers/winreg_plugins/run.py new file mode 100644 index 0000000..3b3391a --- /dev/null +++ b/plaso/parsers/winreg_plugins/run.py @@ -0,0 +1,90 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Run/RunOnce Key plugins for Plaso.""" + +from plaso.events import windows_events +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +class RunUserPlugin(interface.KeyPlugin): + """Windows Registry plugin for parsing user specific auto runs.""" + + NAME = 'winreg_run' + DESCRIPTION = u'Parser for run and run once Registry data.' + + REG_TYPE = 'NTUSER' + + REG_KEYS = [ + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Run', + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\RunOnce'] + + URLS = ['http://msdn.microsoft.com/en-us/library/aa376977(v=vs.85).aspx'] + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Collect the Values under the Run Key and return an event for each one. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + for value in key.GetValues(): + # Ignore the default value. + if not value.name: + continue + + # Ignore any value that is empty or that does not contain a string. + if not value.data or not value.DataIsString(): + continue + + text_dict = {} + text_dict[value.name] = value.data + + event_object = windows_events.WindowsRegistryEvent( + key.last_written_timestamp, key.path, text_dict, offset=key.offset, + urls=self.URLS, registry_type=registry_type, + source_append=': Run Key') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +class RunSoftwarePlugin(RunUserPlugin): + """Windows Registry plugin for parsing system wide auto runs.""" + + NAME = 'winreg_run_software' + + REG_TYPE = 'SOFTWARE' + + REG_KEYS = [ + u'\\Microsoft\\Windows\\CurrentVersion\\Run', + u'\\Microsoft\\Windows\\CurrentVersion\\RunOnce', + u'\\Microsoft\\Windows\\CurrentVersion\\RunOnce\\Setup', + u'\\Microsoft\\Windows\\CurrentVersion\\RunServices', + u'\\Microsoft\\Windows\\CurrentVersion\\RunServicesOnce'] + + +winreg.WinRegistryParser.RegisterPlugins([ + RunUserPlugin, RunSoftwarePlugin]) diff --git a/plaso/parsers/winreg_plugins/run_test.py b/plaso/parsers/winreg_plugins/run_test.py new file mode 100644 index 0000000..ac91f1d --- /dev/null +++ b/plaso/parsers/winreg_plugins/run_test.py @@ -0,0 +1,179 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Run Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.parsers.winreg_plugins import run +from plaso.parsers.winreg_plugins import test_lib + + +class RunNtuserPlugintest(test_lib.RegistryPluginTestCase): + """Tests for the Run Windows Registry plugin on the User hive.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = run.RunUserPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER-RunTests.DAT']) + key_path = u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Run' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + # Timestamp is: 2012-04-05T17:03:53.992061+00:00 + self.assertEquals(event_object.timestamp, 1333645433992061) + + expected_msg = ( + u'[{0:s}] Sidebar: %ProgramFiles%\\Windows Sidebar\\Sidebar.exe ' + u'/autoRun').format(key_path) + expected_msg_short = ( + u'[{0:s}] Sidebar: %ProgramFiles%\\Wind...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +class RunOnceNtuserPlugintest(test_lib.RegistryPluginTestCase): + """Tests for the RunOnce Windows Registry plugin on the User hive.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = run.RunUserPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER-RunTests.DAT']) + key_path = u'\\Software\\Microsoft\\Windows\\CurrentVersion\\RunOnce' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + # Timestamp is: 2012-04-05T17:03:53.992061+00:00 + self.assertEquals(event_object.timestamp, 1333645433992061) + + expected_msg = ( + u'[{0:s}] mctadmin: C:\\Windows\\System32\\mctadmin.exe').format( + key_path) + expected_msg_short = ( + u'[{0:s}] mctadmin: C:\\Windows\\Sys...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +class RunSoftwarePluginTest(test_lib.RegistryPluginTestCase): + """Tests for the Run Windows Registry plugin on the Software hive.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = run.RunSoftwarePlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['SOFTWARE-RunTests']) + key_path = u'\\Microsoft\\Windows\\CurrentVersion\\Run' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 3) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + # Timestamp is: 2011-09-16T20:57:09.067575+00:00 + self.assertEquals(event_object.timestamp, 1316206629067575) + + expected_msg = ( + u'[{0:s}] VMware Tools: \"C:\\Program Files\\VMware\\VMware Tools' + u'\\VMwareTray.exe\"').format(key_path) + expected_msg_short = ( + u'[{0:s}] VMware Tools: \"C:\\Program Files\\VMwar...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + self.assertEquals(event_objects[1].timestamp, 1316206629067575) + + +class RunOnceSoftwarePluginTest(test_lib.RegistryPluginTestCase): + """Tests for the RunOnce Windows Registry plugin on the Software hive.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = run.RunSoftwarePlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['SOFTWARE-RunTests']) + key_path = u'\\Microsoft\\Windows\\CurrentVersion\\RunOnce' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + # Timestamp is: 2012-04-06T14:07:27.750000+00:00 + self.assertEquals(event_object.timestamp, 1333721247750000) + + expected_msg = ( + u'[{0:s}] *WerKernelReporting: %SYSTEMROOT%\\SYSTEM32\\WerFault.exe ' + u'-k -rq').format(key_path) + expected_msg_short = ( + u'[{0:s}] *WerKernelReporting: %SYSTEMROOT%...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/sam_users.py b/plaso/parsers/winreg_plugins/sam_users.py new file mode 100644 index 0000000..60a9d3d --- /dev/null +++ b/plaso/parsers/winreg_plugins/sam_users.py @@ -0,0 +1,191 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors.# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the SAM Users & Names key plugin.""" + +import construct +import logging +from plaso.events import windows_events +from plaso.lib import binary +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +__author__ = 'Preston Miller, dpmforensics.com, github.com/prmiller91' + + +class UsersPlugin(interface.KeyPlugin): + """SAM Windows Registry plugin for Users Account information.""" + + NAME = 'winreg_sam_users' + DESCRIPTION = u'Parser for SAM Users and Names Registry keys.' + + REG_KEYS = [u'\\SAM\\Domains\\Account\\Users'] + REG_TYPE = 'SAM' + F_VALUE_STRUCT = construct.Struct( + 'f_struct', construct.Padding(8), construct.ULInt64('last_login'), + construct.Padding(8), construct.ULInt64('password_reset'), + construct.Padding(16), construct.ULInt16('rid'), construct.Padding(16), + construct.ULInt8('login_count')) + V_VALUE_HEADER = construct.Struct( + 'v_header', construct.Array(11, construct.ULInt32('values'))) + V_VALUE_HEADER_SIZE = 0xCC + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Collect data from Users and Names and produce event objects. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + + name_dict = {} + + name_key = key.GetSubkey('Names') + if not name_key: + logging.error(u'Unable to locate Names key.') + return + values = [(v.name, v.last_written_timestamp) for v in name_key.GetSubkeys()] + name_dict = dict(values) + + for subkey in key.GetSubkeys(): + text_dict = {} + if subkey.name == 'Names': + continue + text_dict['user_guid'] = subkey.name + parsed_v_value = self._ParseVValue(subkey) + if not parsed_v_value: + logging.error(u'V Value was not succesfully parsed by ParseVValue.') + return + username = parsed_v_value[0] + full_name = parsed_v_value[1] + comments = parsed_v_value[2] + if username: + text_dict['username'] = username + if full_name: + text_dict['full_name'] = full_name + if comments: + text_dict['comments'] = comments + if name_dict: + account_create_time = name_dict.get(text_dict.get('username'), 0) + else: + account_create_time = 0 + + f_data = self._ParseFValue(subkey) + last_login_time = timelib.Timestamp.FromFiletime(f_data.last_login) + password_reset_time = timelib.Timestamp.FromFiletime( + f_data.password_reset) + text_dict['account_rid'] = f_data.rid + text_dict['login_count'] = f_data.login_count + + if account_create_time > 0: + event_object = windows_events.WindowsRegistryEvent( + account_create_time, key.path, text_dict, + usage=eventdata.EventTimestamp.ACCOUNT_CREATED, + offset=key.offset, registry_type=registry_type, + source_append=u'User Account Information') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if last_login_time > 0: + event_object = windows_events.WindowsRegistryEvent( + last_login_time, key.path, text_dict, + usage=eventdata.EventTimestamp.LAST_LOGIN_TIME, + offset=key.offset, + registry_type=registry_type, + source_append=u'User Account Information') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if password_reset_time > 0: + event_object = windows_events.WindowsRegistryEvent( + password_reset_time, key.path, text_dict, + usage=eventdata.EventTimestamp.LAST_PASSWORD_RESET, + offset=key.offset, registry_type=registry_type, + source_append=u'User Account Information') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + def _ParseVValue(self, key): + """Parses V value and returns name, fullname, and comments data. + + Args: + key: Registry key (instance of winreg.WinRegKey). + + Returns: + name: Name data parsed with name start and length values. + fullname: Fullname data parsed with fullname start and length values. + comments: Comments data parsed with comments start and length values. + """ + + v_value = key.GetValue('V') + if not v_value: + logging.error(u'Unable to locate V Value in key.') + return + try: + structure = self.V_VALUE_HEADER.parse(v_value.data) + except construct.FieldError as exception: + logging.error( + u'Unable to extract V value header data: {:s}'.format(exception)) + return + name_offset = structure.values()[0][3] + self.V_VALUE_HEADER_SIZE + full_name_offset = structure.values()[0][6] + self.V_VALUE_HEADER_SIZE + comments_offset = structure.values()[0][9] + self.V_VALUE_HEADER_SIZE + name_raw = v_value.data[ + name_offset:name_offset + structure.values()[0][4]] + full_name_raw = v_value.data[ + full_name_offset:full_name_offset + structure.values()[0][7]] + comments_raw = v_value.data[ + comments_offset:comments_offset + structure.values()[0][10]] + name = binary.ReadUtf16(name_raw) + full_name = binary.ReadUtf16(full_name_raw) + comments = binary.ReadUtf16(comments_raw) + return name, full_name, comments + + def _ParseFValue(self, key): + """Parses F value and returns parsed F data construct object. + + Args: + key: Registry key (instance of winreg.WinRegKey). + + Returns: + f_data: Construct parsed F value containing rid, login count, + and timestamp information. + """ + f_value = key.GetValue('F') + if not f_value: + logging.error(u'Unable to locate F Value in key.') + return + try: + f_data = self.F_VALUE_STRUCT.parse(f_value.data) + except construct.FieldError as exception: + logging.error( + u'Unable to extract F value data: {:s}'.format(exception)) + return + return f_data + + +winreg.WinRegistryParser.RegisterPlugin(UsersPlugin) diff --git a/plaso/parsers/winreg_plugins/sam_users_test.py b/plaso/parsers/winreg_plugins/sam_users_test.py new file mode 100644 index 0000000..25810c2 --- /dev/null +++ b/plaso/parsers/winreg_plugins/sam_users_test.py @@ -0,0 +1,80 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Users key plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import event +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import test_lib +from plaso.parsers.winreg_plugins import sam_users + + +__author__ = 'Preston Miller, dpmforensics.com, github.com/prmiller91' + + +class UsersPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the SAM Users key plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = sam_users.UsersPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['SAM']) + key_path = u'\\SAM\\Domains\\Account\\Users' + winreg_key = self._GetKeyFromFile(test_file, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 7) + + event_object = event_objects[0] + + self._TestRegvalue(event_object, u'account_rid', 500) + self._TestRegvalue(event_object, u'login_count', 6) + self._TestRegvalue(event_object, u'user_guid', u'000001F4') + self._TestRegvalue(event_object, u'username', u'Administrator') + + expected_msg = ( + u'[\\SAM\\Domains\\Account\\Users] ' + u'account_rid: 500 ' + u'comments: Built-in account for administering the computer/domain ' + u'login_count: 6 ' + u'user_guid: 000001F4 ' + u'username: Administrator') + + # Match UTC timestamp. + time = long(timelib_test.CopyStringToTimestamp( + u'2014-09-24 03:36:06.358837')) + self.assertEquals(event_object.timestamp, time) + + expected_msg_short = ( + u'[\\SAM\\Domains\\Account\\Users] ' + u'account_rid: 500 ' + u'comments: Built-in account for ...') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/services.py b/plaso/parsers/winreg_plugins/services.py new file mode 100644 index 0000000..975df8f --- /dev/null +++ b/plaso/parsers/winreg_plugins/services.py @@ -0,0 +1,98 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plug-in to format the Services and Drivers key with Start and Type values.""" + +from plaso.events import windows_events +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +class ServicesPlugin(interface.ValuePlugin): + """Plug-in to format the Services and Drivers keys having Type and Start.""" + + NAME = 'winreg_services' + DESCRIPTION = u'Parser for services and drivers Registry data.' + + REG_VALUES = frozenset(['Type', 'Start']) + REG_TYPE = 'SYSTEM' + URLS = ['http://support.microsoft.com/kb/103000'] + + + def GetServiceDll(self, key): + """Get the Service DLL for a service, if it exists. + + Checks for a ServiceDLL for in the Parameters subkey of a service key in + the Registry. + + Args: + key: A Windows Registry key (instance of WinRegKey). + """ + parameters_key = key.GetSubkey('Parameters') + if parameters_key: + service_dll = parameters_key.GetValue('ServiceDll') + if service_dll: + return service_dll.data + else: + return None + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Create one event for each subkey under Services that has Type and Start. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + text_dict = {} + + service_type_value = key.GetValue('Type') + service_start_value = key.GetValue('Start') + + # Grab the ServiceDLL value if it exists. + if service_type_value and service_start_value: + service_dll = self.GetServiceDll(key) + if service_dll: + text_dict['ServiceDll'] = service_dll + + # Gather all the other string and integer values and insert as they are. + for value in key.GetValues(): + if not value.name: + continue + if value.name not in text_dict: + if value.DataIsString() or value.DataIsInteger(): + text_dict[value.name] = value.data + elif value.DataIsMultiString(): + text_dict[value.name] = u', '.join(value.data) + + # Create a specific service event, so that we can recognize and expand + # certain values when we're outputting the event. + event_object = windows_events.WindowsRegistryServiceEvent( + key.last_written_timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type, urls=self.URLS) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(ServicesPlugin) diff --git a/plaso/parsers/winreg_plugins/services_test.py b/plaso/parsers/winreg_plugins/services_test.py new file mode 100644 index 0000000..202772f --- /dev/null +++ b/plaso/parsers/winreg_plugins/services_test.py @@ -0,0 +1,170 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains tests for Services Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import services +from plaso.parsers.winreg_plugins import test_lib +from plaso.winreg import test_lib as winreg_test_lib + + +class ServicesRegistryPluginTest(test_lib.RegistryPluginTestCase): + """The unit test for Services Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = services.ServicesPlugin() + + def testProcess(self): + """Tests the Process function on a virtual key.""" + key_path = u'\\ControlSet001\\services\\TestDriver' + + values = [] + values.append(winreg_test_lib.TestRegValue( + 'Type', '\x02\x00\x00\x00', 4, 123)) + values.append(winreg_test_lib.TestRegValue( + 'Start', '\x02\x00\x00\x00', 4, 127)) + values.append(winreg_test_lib.TestRegValue( + 'ErrorControl', '\x01\x00\x00\x00', 4, 131)) + values.append(winreg_test_lib.TestRegValue( + 'Group', 'Pnp Filter'.encode('utf_16_le'), 1, 140)) + values.append(winreg_test_lib.TestRegValue( + 'DisplayName', 'Test Driver'.encode('utf_16_le'), 1, 160)) + values.append(winreg_test_lib.TestRegValue( + 'DriverPackageId', + 'testdriver.inf_x86_neutral_dd39b6b0a45226c4'.encode('utf_16_le'), 1, + 180)) + values.append(winreg_test_lib.TestRegValue( + 'ImagePath', 'C:\\Dell\\testdriver.sys'.encode('utf_16_le'), 1, 200)) + + timestamp = timelib_test.CopyStringToTimestamp( + '2012-08-28 09:23:49.002031') + winreg_key = winreg_test_lib.TestRegKey( + key_path, timestamp, values, 1456) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-08-28 09:23:49.002031') + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = ( + u'[{0:s}] ' + u'DisplayName: Test Driver ' + u'DriverPackageId: testdriver.inf_x86_neutral_dd39b6b0a45226c4 ' + u'ErrorControl: Normal (1) ' + u'Group: Pnp Filter ' + u'ImagePath: C:\\Dell\\testdriver.sys ' + u'Start: Auto Start (2) ' + u'Type: File System Driver (0x2)').format(key_path) + expected_msg_short = ( + u'[{0:s}] ' + u'DisplayName: Test Driver ' + u'DriverPackageId...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + def testProcessFile(self): + """Tests the Process function on a key in a file.""" + test_file_entry = self._GetTestFileEntryFromPath(['SYSTEM']) + key_path = u'\\ControlSet001\\services' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + + event_objects = [] + + # Select a few service subkeys to perform additional testing. + bits_event_objects = None + mc_task_manager_event_objects = None + rdp_video_miniport_event_objects = None + + for winreg_subkey in winreg_key.GetSubkeys(): + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_subkey, file_entry=test_file_entry) + sub_event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + event_objects.extend(sub_event_objects) + + if winreg_subkey.name == 'BITS': + bits_event_objects = sub_event_objects + elif winreg_subkey.name == 'McTaskManager': + mc_task_manager_event_objects = sub_event_objects + elif winreg_subkey.name == 'RdpVideoMiniport': + rdp_video_miniport_event_objects = sub_event_objects + + self.assertEquals(len(event_objects), 416) + + # Test the BITS subkey event objects. + self.assertEquals(len(bits_event_objects), 1) + + event_object = bits_event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-04-06 20:43:27.639075') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self._TestRegvalue(event_object, u'Type', 0x20) + self._TestRegvalue(event_object, u'Start', 3) + self._TestRegvalue( + event_object, u'ServiceDll', u'%SystemRoot%\\System32\\qmgr.dll') + + # Test the McTaskManager subkey event objects. + self.assertEquals(len(mc_task_manager_event_objects), 1) + + event_object = mc_task_manager_event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-09-16 20:49:16.877415') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self._TestRegvalue(event_object, u'DisplayName', u'McAfee Task Manager') + self._TestRegvalue(event_object, u'Type', 0x10) + + # Test the RdpVideoMiniport subkey event objects. + self.assertEquals(len(rdp_video_miniport_event_objects), 1) + + event_object = rdp_video_miniport_event_objects[0] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-09-17 13:37:59.347157') + self.assertEquals(event_object.timestamp, expected_timestamp) + + self._TestRegvalue(event_object, u'Start', 3) + expected_value = u'System32\\drivers\\rdpvideominiport.sys' + self._TestRegvalue(event_object, u'ImagePath', expected_value) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/shutdown.py b/plaso/parsers/winreg_plugins/shutdown.py new file mode 100644 index 0000000..26ce50e --- /dev/null +++ b/plaso/parsers/winreg_plugins/shutdown.py @@ -0,0 +1,78 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors.# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the LastShutdown value plugin.""" + +import construct +import logging +from plaso.events import windows_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +__author__ = 'Preston Miller, dpmforensics.com, github.com/prmiller91' + + +class ShutdownPlugin(interface.KeyPlugin): + """Windows Registry plugin for parsing the last shutdown time of a system.""" + + NAME = 'winreg_shutdown' + DESCRIPTION = u'Parser for ShutdownTime Registry value.' + + REG_KEYS = [u'\\{current_control_set}\\Control\\Windows'] + REG_TYPE = 'SYSTEM' + FILETIME_STRUCT = construct.ULInt64('filetime_timestamp') + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Collect ShutdownTime value under Windows and produce an event object. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + shutdown_value = key.GetValue('ShutdownTime') + if not shutdown_value: + return + text_dict = {} + text_dict['Description'] = shutdown_value.name + try: + filetime = self.FILETIME_STRUCT.parse(shutdown_value.data) + except construct.FieldError as exception: + logging.error( + u'Unable to extract shutdown timestamp: {0:s}'.format(exception)) + return + timestamp = timelib.Timestamp.FromFiletime(filetime) + + event_object = windows_events.WindowsRegistryEvent( + timestamp, key.path, text_dict, + usage=eventdata.EventTimestamp.LAST_SHUTDOWN, offset=key.offset, + registry_type=registry_type, + source_append=u'Shutdown Entry') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(ShutdownPlugin) diff --git a/plaso/parsers/winreg_plugins/shutdown_test.py b/plaso/parsers/winreg_plugins/shutdown_test.py new file mode 100644 index 0000000..21a1c21 --- /dev/null +++ b/plaso/parsers/winreg_plugins/shutdown_test.py @@ -0,0 +1,79 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the LastShutdown value plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import test_lib +from plaso.parsers.winreg_plugins import shutdown + + +__author__ = 'Preston Miller, dpmforensics.com, github.com/prmiller91' + + +class ShutdownPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the LastShutdown value plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = shutdown.ShutdownPlugin() + + def testProcess(self): + """Tests the Process function.""" + knowledge_base_values = {'current_control_set': u'ControlSet001'} + test_file_entry = self._GetTestFileEntryFromPath(['SYSTEM']) + key_path = u'\\ControlSet001\\Control\\Windows' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, knowledge_base_values=knowledge_base_values, + file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_value = u'ShutdownTime' + self._TestRegvalue(event_object, u'Description', expected_value) + + expected_msg = ( + u'[\\ControlSet001\\Control\\Windows] ' + u'Description: ShutdownTime') + + # Match UTC timestamp. + time = long(timelib_test.CopyStringToTimestamp( + u'2012-04-04 01:58:40.839249')) + self.assertEquals(event_object.timestamp, time) + + expected_msg_short = ( + u'[\\ControlSet001\\Control\\Windows] ' + u'Description: ShutdownTime') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/task_scheduler.py b/plaso/parsers/winreg_plugins/task_scheduler.py new file mode 100644 index 0000000..4506e2f --- /dev/null +++ b/plaso/parsers/winreg_plugins/task_scheduler.py @@ -0,0 +1,169 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Task Scheduler Registry keys plugins.""" + +import logging + +import construct + +from plaso.events import windows_events +from plaso.events import time_events +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +class TaskCacheEvent(time_events.FiletimeEvent): + """Convenience class for a Task Cache event.""" + + DATA_TYPE = 'task_scheduler:task_cache:entry' + + def __init__( + self, timestamp, timestamp_description, task_name, task_identifier): + """Initializes the event. + + Args: + timestamp: The FILETIME value for the timestamp. + timestamp_description: The usage string for the timestamp value. + task_name: String containing the name of the task. + task_identifier: String containing the identifier of the task. + """ + super(TaskCacheEvent, self).__init__(timestamp, timestamp_description) + + self.offset = 0 + self.task_name = task_name + self.task_identifier = task_identifier + + +class TaskCachePlugin(interface.KeyPlugin): + """Plugin that parses a Task Cache key.""" + + NAME = 'winreg_task_cache' + DESCRIPTION = u'Parser for Task Scheduler cache Registry data.' + + REG_TYPE = 'SOFTWARE' + REG_KEYS = [ + u'\\Microsoft\\Windows NT\\CurrentVersion\\Schedule\\TaskCache'] + + URL = [ + u'https://code.google.com/p/winreg-kb/wiki/TaskSchedulerKeys'] + + _DYNAMIC_INFO_STRUCT = construct.Struct( + 'dynamic_info_record', + construct.ULInt32('version'), + construct.ULInt64('last_registered_time'), + construct.ULInt64('launch_time'), + construct.Padding(8)) + + _DYNAMIC_INFO_STRUCT_SIZE = _DYNAMIC_INFO_STRUCT.sizeof() + + def _GetIdValue(self, key): + """Retrieves the Id value from Task Cache Tree key. + + Args: + key: A Windows Registry key (instance of WinRegKey). + + Yields: + A tuple containing a Windows Registry Key (instance of WinRegKey) and + a Windows Registry value (instance of WinRegValue). + """ + id_value = key.GetValue(u'Id') + if id_value: + yield key, id_value + + for sub_key in key.GetSubkeys(): + for value_key, id_value in self._GetIdValue(sub_key): + yield value_key, id_value + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Parses a Task Cache Registry key. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + tasks_key = key.GetSubkey(u'Tasks') + tree_key = key.GetSubkey(u'Tree') + + if not tasks_key or not tree_key: + logging.warning(u'Task Cache is missing a Tasks or Tree sub key.') + return + + task_guids = {} + for sub_key in tree_key.GetSubkeys(): + for value_key, id_value in self._GetIdValue(sub_key): + # The GUID is in the form {%GUID%} and stored an UTF-16 little-endian + # string and should be 78 bytes in size. + if len(id_value.raw_data) != 78: + logging.warning( + u'[{0:s}] unsupported Id value data size.'.format(self.NAME)) + continue + task_guids[id_value.data] = value_key.name + + for sub_key in tasks_key.GetSubkeys(): + dynamic_info_value = sub_key.GetValue(u'DynamicInfo') + if not dynamic_info_value: + continue + + if len(dynamic_info_value.raw_data) != self._DYNAMIC_INFO_STRUCT_SIZE: + logging.warning( + u'[{0:s}] unsupported DynamicInfo value data size.'.format( + self.NAME)) + continue + + dynamic_info = self._DYNAMIC_INFO_STRUCT.parse( + dynamic_info_value.raw_data) + + name = task_guids.get(sub_key.name, sub_key.name) + + text_dict = {} + text_dict[u'Task: {0:s}'.format(name)] = u'[ID: {0:s}]'.format( + sub_key.name) + event_object = windows_events.WindowsRegistryEvent( + key.last_written_timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if dynamic_info.last_registered_time: + # Note this is likely either the last registered time or + # the update time. + event_object = TaskCacheEvent( + dynamic_info.last_registered_time, u'Last registered time', name, + sub_key.name) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + if dynamic_info.launch_time: + # Note this is likely the launch time. + event_object = TaskCacheEvent( + dynamic_info.launch_time, u'Launch time', name, sub_key.name) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + # TODO: Add support for the Triggers value. + + +winreg.WinRegistryParser.RegisterPlugin(TaskCachePlugin) diff --git a/plaso/parsers/winreg_plugins/task_scheduler_test.py b/plaso/parsers/winreg_plugins/task_scheduler_test.py new file mode 100644 index 0000000..e109822 --- /dev/null +++ b/plaso/parsers/winreg_plugins/task_scheduler_test.py @@ -0,0 +1,87 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Task Scheduler Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import task_scheduler +from plaso.parsers.winreg_plugins import test_lib + + +class TaskCachePluginTest(test_lib.RegistryPluginTestCase): + """Tests for the Task Cache key Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = task_scheduler.TaskCachePlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file = self._GetTestFilePath(['SOFTWARE-RunTests']) + key_path = ( + u'\\Microsoft\\Windows NT\\CurrentVersion\\Schedule\\TaskCache') + winreg_key = self._GetKeyFromFile(test_file, key_path) + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 174) + + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-07-14 04:53:25.811618') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'Task: SynchronizeTime' + expected_value = u'[ID: {044A6734-E90E-4F8F-B357-B2DC8AB3B5EC}]' + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_msg = u'[{0:s}] {1:s}: {2:s}'.format( + key_path, regvalue_identifier, expected_value) + + expected_msg_short = u'[{0:s}] Task: SynchronizeTi...'.format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[1] + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-07-14 05:08:50.811626') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'Task: SynchronizeTime' + + expected_msg = ( + u'Task: SynchronizeTime ' + u'[Identifier: {044A6734-E90E-4F8F-B357-B2DC8AB3B5EC}]') + + expected_msg_short = ( + u'Task: SynchronizeTime') + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/terminal_server.py b/plaso/parsers/winreg_plugins/terminal_server.py new file mode 100644 index 0000000..66afdb6 --- /dev/null +++ b/plaso/parsers/winreg_plugins/terminal_server.py @@ -0,0 +1,127 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Terminal Server Registry plugins.""" + +from plaso.events import windows_events +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class TerminalServerClientPlugin(interface.KeyPlugin): + """Windows Registry plugin for Terminal Server Client Connection keys.""" + + NAME = 'winreg_rdp' + DESCRIPTION = u'Parser for Terminal Server Client Connection Registry data.' + + REG_TYPE = 'NTUSER' + REG_KEYS = [ + u'\\Software\\Microsoft\\Terminal Server Client\\Servers', + u'\\Software\\Microsoft\\Terminal Server Client\\Default\\AddIns\\RDPDR'] + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Collect Values in Servers and return event for each one. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + for subkey in key.GetSubkeys(): + username_value = subkey.GetValue('UsernameHint') + + if (username_value and username_value.data and + username_value.DataIsString()): + username = username_value.data + else: + username = u'None' + + text_dict = {} + text_dict['UsernameHint'] = username + + event_object = windows_events.WindowsRegistryEvent( + key.last_written_timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type, + source_append=': RDP Connection') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +class TerminalServerClientMRUPlugin(interface.KeyPlugin): + """Windows Registry plugin for Terminal Server Client Connection MRUs keys.""" + + NAME = 'winreg_rdp_mru' + DESCRIPTION = u'Parser for Terminal Server Client MRU Registry data.' + + REG_TYPE = 'NTUSER' + REG_KEYS = [ + u'\\Software\\Microsoft\\Terminal Server Client\\Default', + u'\\Software\\Microsoft\\Terminal Server Client\\LocalDevices'] + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Collect MRU Values and return event for each one. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + """ + for value in key.GetValues(): + # TODO: add a check for the value naming scheme. + # Ignore the default value. + if not value.name: + continue + + # Ignore any value that is empty or that does not contain a string. + if not value.data or not value.DataIsString(): + continue + + text_dict = {} + text_dict[value.name] = value.data + + if value.name == 'MRU0': + timestamp = key.last_written_timestamp + else: + timestamp = 0 + + event_object = windows_events.WindowsRegistryEvent( + timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type, + source_append=u': RDP Connection') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugins([ + TerminalServerClientPlugin, TerminalServerClientMRUPlugin]) diff --git a/plaso/parsers/winreg_plugins/terminal_server_test.py b/plaso/parsers/winreg_plugins/terminal_server_test.py new file mode 100644 index 0000000..c70c72e --- /dev/null +++ b/plaso/parsers/winreg_plugins/terminal_server_test.py @@ -0,0 +1,130 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Terminal Server Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import terminal_server +from plaso.parsers.winreg_plugins import test_lib +from plaso.winreg import test_lib as winreg_test_lib + + +class ServersTerminalServerClientPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the Terminal Server Client Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = terminal_server.TerminalServerClientPlugin() + + def testProcess(self): + """Tests the Process function.""" + key_path = u'\\Software\\Microsoft\\Terminal Server Client\\Servers' + values = [] + + values.append(winreg_test_lib.TestRegValue( + 'UsernameHint', 'DOMAIN\\username'.encode('utf_16_le'), + winreg_test_lib.TestRegValue.REG_SZ, offset=1892)) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-08-28 09:23:49.002031') + + server_key_path = ( + u'\\Software\\Microsoft\\Terminal Server Client\\Servers\\myserver.com') + server_key = winreg_test_lib.TestRegKey( + server_key_path, expected_timestamp, values, offset=1456) + + winreg_key = winreg_test_lib.TestRegKey( + key_path, expected_timestamp, None, offset=865, subkeys=[server_key]) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = u'[{0:s}] UsernameHint: DOMAIN\\username'.format(key_path) + expected_msg_short = ( + u'[{0:s}] UsernameHint: DOMAIN\\use...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +class DefaultTerminalServerClientMRUPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the Terminal Server Client MRU Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = terminal_server.TerminalServerClientMRUPlugin() + + def testProcess(self): + """Tests the Process function.""" + key_path = u'\\Software\\Microsoft\\Terminal Server Client\\Default' + values = [] + + values.append(winreg_test_lib.TestRegValue( + 'MRU0', '192.168.16.60'.encode('utf_16_le'), + winreg_test_lib.TestRegValue.REG_SZ, offset=1892)) + values.append(winreg_test_lib.TestRegValue( + 'MRU1', 'computer.domain.com'.encode('utf_16_le'), + winreg_test_lib.TestRegValue.REG_SZ, 612)) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-08-28 09:23:49.002031') + winreg_key = winreg_test_lib.TestRegKey( + key_path, expected_timestamp, values, 1456) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 2) + + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_msg = u'[{0:s}] MRU0: 192.168.16.60'.format(key_path) + expected_msg_short = u'[{0:s}] MRU0: 192.168.16.60'.format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + event_object = event_objects[1] + + self.assertEquals(event_object.timestamp, 0) + + expected_msg = u'[{0:s}] MRU1: computer.domain.com'.format(key_path) + expected_msg_short = u'[{0:s}] MRU1: computer.domain.com'.format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/test_lib.py b/plaso/parsers/winreg_plugins/test_lib.py new file mode 100644 index 0000000..17dc9f7 --- /dev/null +++ b/plaso/parsers/winreg_plugins/test_lib.py @@ -0,0 +1,106 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Windows Registry plugin related functions and classes for testing.""" + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.engine import single_process +from plaso.parsers import test_lib +from plaso.winreg import winregistry + + +class RegistryPluginTestCase(test_lib.ParserTestCase): + """The unit test case for a Windows Registry plugin.""" + + def _GetKeyFromFile(self, path, key_path): + """Retrieves a Windows Registry key from a file. + + Args: + path: The path to the file, as a string. + key_path: The path of the key to parse. + + Returns: + A Windows Registry key (instance of WinRegKey). + """ + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=path) + file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) + return self._GetKeyFromFileEntry(file_entry, key_path) + + def _GetKeyFromFileEntry(self, file_entry, key_path): + """Retrieves a Windows Registry key from a file. + + Args: + file_entry: A dfVFS file_entry object that references a test file. + key_path: The path of the key to parse. + + Returns: + A Windows Registry key (instance of WinRegKey). + """ + registry = winregistry.WinRegistry(winregistry.WinRegistry.BACKEND_PYREGF) + winreg_file = registry.OpenFile(file_entry, codepage='cp1252') + return winreg_file.GetKeyByPath(key_path) + + def _ParseKeyWithPlugin( + self, plugin_object, winreg_key, knowledge_base_values=None, + file_entry=None, parser_chain=None): + """Parses a key within a Windows Registry file using the plugin object. + + Args: + plugin_object: The plugin object. + winreg_key: The Windows Registry Key. + knowledge_base_values: Optional dict containing the knowledge base + values. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + + Returns: + An event object queue consumer object (instance of + TestEventObjectQueueConsumer). + """ + self.assertNotEquals(winreg_key, None) + + event_queue = single_process.SingleProcessQueue() + event_queue_consumer = test_lib.TestEventObjectQueueConsumer(event_queue) + + parse_error_queue = single_process.SingleProcessQueue() + + parser_context = self._GetParserContext( + event_queue, parse_error_queue, + knowledge_base_values=knowledge_base_values) + plugin_object.Process( + parser_context, key=winreg_key, parser_chain=parser_chain, + file_entry=file_entry) + + return event_queue_consumer + + def _TestRegvalue(self, event_object, identifier, expected_value): + """Tests a specific 'regvalue' attribute within the event object. + + Args: + event_object: the event object (instance of EventObject). + identifier: the identifier of the 'regvalue' attribute. + expected_value: the expected value of the 'regvalue' attribute. + """ + self.assertTrue(hasattr(event_object, 'regvalue')) + self.assertIn(identifier, event_object.regvalue) + self.assertEquals(event_object.regvalue[identifier], expected_value) diff --git a/plaso/parsers/winreg_plugins/typedurls.py b/plaso/parsers/winreg_plugins/typedurls.py new file mode 100644 index 0000000..dafd8bf --- /dev/null +++ b/plaso/parsers/winreg_plugins/typedurls.py @@ -0,0 +1,84 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors.# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the typed URLs plugins for Plaso.""" + +import re + +from plaso.events import windows_events +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class TypedURLsPlugin(interface.KeyPlugin): + """A Windows Registry plugin for typed URLs history.""" + + NAME = 'winreg_typed_urls' + DESCRIPTION = u'Parser for Internet Explorer typed URLs Registry data.' + + REG_TYPE = 'NTUSER' + REG_KEYS = [ + u'\\Software\\Microsoft\\Internet Explorer\\TypedURLs', + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\TypedPaths'] + + _RE_VALUE_NAME = re.compile(r'^url[0-9]+$', re.I) + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Collect typed URLs values. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + registry_type: Optional Registry type string. The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + for value in key.GetValues(): + # Ignore any value not in the form: 'url[0-9]+'. + if not value.name or not self._RE_VALUE_NAME.search(value.name): + continue + + # Ignore any value that is empty or that does not contain a string. + if not value.data or not value.DataIsString(): + continue + + # TODO: shouldn't this behavior be, put all the typed urls + # into a single event object with the last written time of the key? + if value.name == 'url1': + timestamp = key.last_written_timestamp + else: + timestamp = 0 + + text_dict = {} + text_dict[value.name] = value.data + + event_object = windows_events.WindowsRegistryEvent( + timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type, + source_append=u': Typed URLs') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(TypedURLsPlugin) diff --git a/plaso/parsers/winreg_plugins/typedurls_test.py b/plaso/parsers/winreg_plugins/typedurls_test.py new file mode 100644 index 0000000..bfbefec --- /dev/null +++ b/plaso/parsers/winreg_plugins/typedurls_test.py @@ -0,0 +1,112 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the MSIE typed URLs Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import test_lib +from plaso.parsers.winreg_plugins import typedurls + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class MsieTypedURLsPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the MSIE typed URLs Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = typedurls.TypedURLsPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER-WIN7.DAT']) + key_path = u'\\Software\\Microsoft\\Internet Explorer\\TypedURLs' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 13) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-03-12 21:23:53.307749') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'url1' + expected_value = u'http://cnn.com/' + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_string = u'[{0:s}] {1:s}: {2:s}'.format( + key_path, regvalue_identifier, expected_value) + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + +class TypedPathsPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the typed paths Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = typedurls.TypedURLsPlugin() + + def testProcess(self): + """Tests the Process function.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER-WIN7.DAT']) + key_path = ( + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\TypedPaths') + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2010-11-10 07:58:15.811625') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'url1' + expected_value = u'\\\\controller' + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_msg = u'[{0:s}] {1:s}: {2:s}'.format( + key_path, regvalue_identifier, expected_value) + expected_msg_short = u'[{0:s}] {1:s}: \\\\cont...'.format( + key_path, regvalue_identifier) + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/usb.py b/plaso/parsers/winreg_plugins/usb.py new file mode 100644 index 0000000..08c9a6b --- /dev/null +++ b/plaso/parsers/winreg_plugins/usb.py @@ -0,0 +1,87 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors.# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the USB key plugin.""" + +import logging + +from plaso.events import windows_events +from plaso.lib import eventdata +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +__author__ = 'Preston Miller, dpmforensics.com, github.com/prmiller91' + + +class USBPlugin(interface.KeyPlugin): + """USB Windows Registry plugin for last connection time.""" + + NAME = 'winreg_usb' + DESCRIPTION = u'Parser for USB storage Registry data.' + + REG_KEYS = [u'\\{current_control_set}\\Enum\\USB'] + REG_TYPE = 'SYSTEM' + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Collect SubKeys under USB and produce an event object for each one. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + for subkey in key.GetSubkeys(): + text_dict = {} + text_dict['subkey_name'] = subkey.name + + vendor_identification = None + product_identification = None + try: + subkey_name_parts = subkey.name.split(u'&') + if len(subkey_name_parts) >= 2: + vendor_identification = subkey_name_parts[0] + product_identification = subkey_name_parts[1] + except ValueError as exception: + logging.warning( + u'Unable to split string: {0:s} with error: {1:s}'.format( + subkey.name, exception)) + + if vendor_identification and product_identification: + text_dict['vendor'] = vendor_identification + text_dict['product'] = product_identification + + for devicekey in subkey.GetSubkeys(): + text_dict['serial'] = devicekey.name + + # Last USB connection per USB device recorded in the Registry. + event_object = windows_events.WindowsRegistryEvent( + devicekey.last_written_timestamp, key.path, text_dict, + usage=eventdata.EventTimestamp.LAST_CONNECTED, offset=key.offset, + registry_type=registry_type, + source_append=': USB Entries') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(USBPlugin) diff --git a/plaso/parsers/winreg_plugins/usb_test.py b/plaso/parsers/winreg_plugins/usb_test.py new file mode 100644 index 0000000..0015c6a --- /dev/null +++ b/plaso/parsers/winreg_plugins/usb_test.py @@ -0,0 +1,80 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the USB Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import event +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import test_lib +from plaso.parsers.winreg_plugins import usb + + +__author__ = 'Preston Miller, dpmforensics.com, github.com/prmiller91' + + +class USBPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the USB Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = usb.USBPlugin() + + def testProcess(self): + """Tests the Process function.""" + knowledge_base_values = {u'current_control_set': u'ControlSet001'} + test_file_entry = self._GetTestFileEntryFromPath([u'SYSTEM']) + key_path = u'\\ControlSet001\\Enum\\USB' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, knowledge_base_values=knowledge_base_values, + file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 7) + + event_object = event_objects[3] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_value = u'VID_0E0F&PID_0002' + self._TestRegvalue(event_object, u'subkey_name', expected_value) + self._TestRegvalue(event_object, u'vendor', u'VID_0E0F') + self._TestRegvalue(event_object, u'product', u'PID_0002') + + expected_msg = ( + r'[\ControlSet001\Enum\USB] product: PID_0002 serial: 6&2ab01149&0&2 ' + r'subkey_name: VID_0E0F&PID_0002 vendor: VID_0E0F') + + # Match UTC timestamp. + time = long(timelib_test.CopyStringToTimestamp( + u'2012-04-07 10:31:37.625246')) + self.assertEquals(event_object.timestamp, time) + + expected_msg_short = u'{0:s}...'.format(expected_msg[0:77]) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/usbstor.py b/plaso/parsers/winreg_plugins/usbstor.py new file mode 100644 index 0000000..86f8f88 --- /dev/null +++ b/plaso/parsers/winreg_plugins/usbstor.py @@ -0,0 +1,133 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors.# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the USBStor keys plugins.""" + +import logging + +from plaso.events import windows_events +from plaso.lib import eventdata +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class USBStorPlugin(interface.KeyPlugin): + """USBStor key plugin.""" + + NAME = 'winreg_usbstor' + DESCRIPTION = u'Parser for USB storage Registry data.' + + REG_KEYS = [u'\\{current_control_set}\\Enum\\USBSTOR'] + REG_TYPE = 'SYSTEM' + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Collect Values under USBStor and return an event object for each one. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + for subkey in key.GetSubkeys(): + text_dict = {} + text_dict['subkey_name'] = subkey.name + + # Time last USB device of this class was first inserted. + event_object = windows_events.WindowsRegistryEvent( + subkey.last_written_timestamp, key.path, text_dict, + usage=eventdata.EventTimestamp.FIRST_CONNECTED, offset=key.offset, + registry_type=registry_type, + source_append=': USBStor Entries') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + # TODO: Determine if these 4 fields always exist. + try: + device_type, vendor, product, revision = subkey.name.split('&') + except ValueError as exception: + logging.warning( + u'Unable to split string: {0:s} with error: {1:s}'.format( + subkey.name, exception)) + + text_dict['device_type'] = device_type + text_dict['vendor'] = vendor + text_dict['product'] = product + text_dict['revision'] = revision + + for devicekey in subkey.GetSubkeys(): + text_dict['serial'] = devicekey.name + + friendly_name_value = devicekey.GetValue('FriendlyName') + if friendly_name_value: + text_dict['friendly_name'] = friendly_name_value.data + else: + text_dict.pop('friendly_name', None) + + # ParentIdPrefix applies to Windows XP Only. + parent_id_prefix_value = devicekey.GetValue('ParentIdPrefix') + if parent_id_prefix_value: + text_dict['parent_id_prefix'] = parent_id_prefix_value.data + else: + text_dict.pop('parent_id_prefix', None) + + # Win7 - Last Connection. + # Vista/XP - Time of an insert. + event_object = windows_events.WindowsRegistryEvent( + devicekey.last_written_timestamp, key.path, text_dict, + usage=eventdata.EventTimestamp.LAST_CONNECTED, offset=key.offset, + registry_type=registry_type, + source_append=': USBStor Entries') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + # Build list of first Insertion times. + first_insert = [] + device_parameter_key = devicekey.GetSubkey('Device Parameters') + if device_parameter_key: + first_insert.append(device_parameter_key.last_written_timestamp) + + log_configuration_key = devicekey.GetSubkey('LogConf') + if (log_configuration_key and + log_configuration_key.last_written_timestamp not in first_insert): + first_insert.append(log_configuration_key.last_written_timestamp) + + properties_key = devicekey.GetSubkey('Properties') + if (properties_key and + properties_key.last_written_timestamp not in first_insert): + first_insert.append(properties_key.last_written_timestamp) + + # Add first Insertion times. + for timestamp in first_insert: + event_object = windows_events.WindowsRegistryEvent( + timestamp, key.path, text_dict, + usage=eventdata.EventTimestamp.LAST_CONNECTED, offset=key.offset, + registry_type=registry_type, + source_append=': USBStor Entries') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(USBStorPlugin) diff --git a/plaso/parsers/winreg_plugins/usbstor_test.py b/plaso/parsers/winreg_plugins/usbstor_test.py new file mode 100644 index 0000000..add1b76 --- /dev/null +++ b/plaso/parsers/winreg_plugins/usbstor_test.py @@ -0,0 +1,84 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the USBStor Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.parsers.winreg_plugins import test_lib +from plaso.parsers.winreg_plugins import usbstor + + +class USBStorPlugin(test_lib.RegistryPluginTestCase): + """Tests for the USBStor Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = usbstor.USBStorPlugin() + + def testProcess(self): + """Tests the Process function.""" + knowledge_base_values = {'current_control_set': u'ControlSet001'} + test_file_entry = self._GetTestFileEntryFromPath(['SYSTEM']) + key_path = u'\\ControlSet001\\Enum\\USBSTOR' + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, knowledge_base_values=knowledge_base_values, + file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 3) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + self.assertEquals(event_object.timestamp, 1333794697640871) + + expected_value = u'Disk&Ven_HP&Prod_v100w&Rev_1024' + self._TestRegvalue(event_object, u'subkey_name', expected_value) + + self._TestRegvalue(event_object, u'device_type', u'Disk') + self._TestRegvalue(event_object, u'vendor', u'Ven_HP') + self._TestRegvalue(event_object, u'product', u'Prod_v100w') + self._TestRegvalue(event_object, u'revision', u'Rev_1024') + + expected_msg = ( + u'[{0:s}] ' + u'device_type: Disk ' + u'friendly_name: HP v100w USB Device ' + u'product: Prod_v100w ' + u'revision: Rev_1024 ' + u'serial: AA951D0000007252&0 ' + u'subkey_name: Disk&Ven_HP&Prod_v100w&Rev_1024 ' + u'vendor: Ven_HP').format(key_path) + + expected_msg_short = ( + u'[{0:s}] ' + u'device_type: Disk ' + u'friendly_name: HP v100w USB D...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/userassist.py b/plaso/parsers/winreg_plugins/userassist.py new file mode 100644 index 0000000..da0cd2d --- /dev/null +++ b/plaso/parsers/winreg_plugins/userassist.py @@ -0,0 +1,208 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the UserAssist Windows Registry plugin.""" + +import logging + +import construct + +from plaso.events import windows_events +from plaso.lib import timelib +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface +from plaso.winnt import environ_expand +from plaso.winnt import known_folder_ids + + +class UserAssistPlugin(interface.KeyPlugin): + """Plugin that parses an UserAssist key.""" + + NAME = 'winreg_userassist' + DESCRIPTION = u'Parser for User Assist Registry data.' + + REG_TYPE = 'NTUSER' + REG_KEYS = [ + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{FA99DFC7-6AC2-453A-A5E2-5E2AFF4507BD}}'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{F4E57C4B-2036-45F0-A9AB-443BCFE33D9F}}'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{F2A1CB5A-E3CC-4A2E-AF9D-505A7009D442}}'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{CEBFF5CD-ACE2-4F4F-9178-9926F41749EA}}'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{CAA59E3C-4792-41A5-9909-6A6A8D32490E}}'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{B267E3AD-A825-4A09-82B9-EEC22AA3B847}}'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{A3D53349-6E61-4557-8FC7-0028EDCEEBF6}}'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{9E04CAB2-CC14-11DF-BB8C-A2F1DED72085}}'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{75048700-EF1F-11D0-9888-006097DEACF9}}'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{5E6AB780-7743-11CF-A12B-00AA004AE837}}'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{0D6D4F41-2994-4BA0-8FEF-620E43CD2812}}'), + (u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer' + u'\\UserAssist\\{{BCB48336-4DDD-48FF-BB0B-D3190DACB3E2}}')] + + URL = [ + u'http://blog.didierstevens.com/programs/userassist/', + u'https://code.google.com/p/winreg-kb/wiki/UserAssistKeys', + u'http://intotheboxes.files.wordpress.com/2010/04' + u'/intotheboxes_2010_q1.pdf'] + + # UserAssist format version used in Windows 2000, XP, 2003, Vista. + USERASSIST_V3_STRUCT = construct.Struct( + 'userassist_entry', + construct.Padding(4), + construct.ULInt32('count'), + construct.ULInt64('timestamp')) + + # UserAssist format version used in Windows 2008, 7, 8. + USERASSIST_V5_STRUCT = construct.Struct( + 'userassist_entry', + construct.Padding(4), + construct.ULInt32('count'), + construct.ULInt32('app_focus_count'), + construct.ULInt32('focus_duration'), + construct.Padding(44), + construct.ULInt64('timestamp'), + construct.Padding(4)) + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Parses a UserAssist Registry key. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + version_value = key.GetValue('Version') + count_subkey = key.GetSubkey('Count') + + if not version_value: + logging.error(u'missing version value') + elif not version_value.DataIsInteger(): + logging.error(u'unsupported version value data type') + elif version_value.data not in [3, 5]: + logging.error(u'unsupported version: {0:d}'.format(version_value.data)) + elif not count_subkey: + logging.error(u'missing count subkey') + else: + userassist_entry_index = 0 + + for value in count_subkey.GetValues(): + try: + value_name = value.name.decode('rot-13') + except UnicodeEncodeError as exception: + logging.debug(( + u'Unable to decode UserAssist string: {0:s} with error: {1:s}.\n' + u'Attempting piecewise decoding.').format( + value.name, exception)) + + characters = [] + for char in value.name: + if ord(char) < 128: + try: + characters.append(char.decode('rot-13')) + except UnicodeEncodeError: + characters.append(char) + else: + characters.append(char) + + value_name = u''.join(characters) + + if version_value.data == 5: + path_segments = value_name.split(u'\\') + + for segment_index in range(0, len(path_segments)): + # Remove the { } from the path segment to get the GUID. + guid = path_segments[segment_index][1:-1] + path_segments[segment_index] = known_folder_ids.PATHS.get( + guid, path_segments[segment_index]) + + value_name = u'\\'.join(path_segments) + # Check if we might need to substitute values. + if '%' in value_name: + # TODO: deprecate direct use of pre_obj. + value_name = environ_expand.ExpandWindowsEnvironmentVariables( + value_name, parser_context.knowledge_base.pre_obj) + + if not value.DataIsBinaryData(): + logging.error(u'unsupported value data type: {0:s}'.format( + value.data_type_string)) + + elif version_value.data == 3: + if len(value.data) != self.USERASSIST_V3_STRUCT.sizeof(): + logging.error(u'unsupported value data size: {0:d}'.format( + len(value.data))) + + else: + parsed_data = self.USERASSIST_V3_STRUCT.parse(value.data) + filetime = parsed_data.get('timestamp', 0) + count = parsed_data.get('count', 0) + + if count > 5: + count -= 5 + + text_dict = {} + text_dict[value_name] = u'[Count: {0:d}]'.format(count) + event_object = windows_events.WindowsRegistryEvent( + timelib.Timestamp.FromFiletime(filetime), count_subkey.path, + text_dict, offset=value.offset, registry_type=registry_type) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + elif version_value.data == 5: + if len(value.data) != self.USERASSIST_V5_STRUCT.sizeof(): + logging.error(u'unsupported value data size: {0:d}'.format( + len(value.data))) + + parsed_data = self.USERASSIST_V5_STRUCT.parse(value.data) + + userassist_entry_index += 1 + count = parsed_data.get('count', None) + app_focus_count = parsed_data.get('app_focus_count', None) + focus_duration = parsed_data.get('focus_duration', None) + timestamp = parsed_data.get('timestamp', 0) + + text_dict = {} + text_dict[value_name] = ( + u'[UserAssist entry: {0:d}, Count: {1:d}, ' + u'Application focus count: {2:d}, Focus duration: {3:d}]').format( + userassist_entry_index, count, app_focus_count, + focus_duration) + + event_object = windows_events.WindowsRegistryEvent( + timelib.Timestamp.FromFiletime(timestamp), count_subkey.path, + text_dict, offset=count_subkey.offset, + registry_type=registry_type) + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(UserAssistPlugin) diff --git a/plaso/parsers/winreg_plugins/userassist_test.py b/plaso/parsers/winreg_plugins/userassist_test.py new file mode 100644 index 0000000..594f298 --- /dev/null +++ b/plaso/parsers/winreg_plugins/userassist_test.py @@ -0,0 +1,112 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the UserAssist Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import test_lib +from plaso.parsers.winreg_plugins import userassist + + +class UserAssistPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the UserAssist Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = userassist.UserAssistPlugin() + + def testProcessOnWinXP(self): + """Tests the Process function on a Windows XP Registry file.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER.DAT']) + key_path = ( + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\UserAssist' + u'\\{75048700-EF1F-11D0-9888-006097DEACF9}') + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 14) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2009-08-04 15:11:22.811067') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'UEME_RUNPIDL:%csidl2%\\MSN.lnk' + expected_value = u'[Count: 14]' + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_msg = u'[{0:s}\\Count] {1:s}: {2:s}'.format( + key_path, regvalue_identifier, expected_value) + # The short message contains the first 76 characters of the key path. + expected_msg_short = u'[{0:s}...'.format(key_path[:76]) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + def testProcessOnWin7(self): + """Tests the Process function on a Windows 7 Registry file.""" + test_file_entry = self._GetTestFileEntryFromPath(['NTUSER-WIN7.DAT']) + + key_path = ( + u'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\UserAssist' + u'\\{CEBFF5CD-ACE2-4F4F-9178-9926F41749EA}') + winreg_key = self._GetKeyFromFileEntry(test_file_entry, key_path) + event_queue_consumer = self._ParseKeyWithPlugin( + self._plugin, winreg_key, file_entry=test_file_entry) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 62) + + event_object = event_objects[0] + + self.assertEquals(event_object.pathspec, test_file_entry.path_spec) + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2010-11-10 07:49:37.078067') + self.assertEquals(event_object.timestamp, expected_timestamp) + + regvalue_identifier = u'Microsoft.Windows.GettingStarted' + expected_value = ( + u'[UserAssist entry: 1, Count: 14, Application focus count: 21, ' + u'Focus duration: 420000]') + self._TestRegvalue(event_object, regvalue_identifier, expected_value) + + expected_msg = u'[{0:s}\\Count] {1:s}: {2:s}'.format( + key_path, regvalue_identifier, expected_value) + # The short message contains the first 76 characters of the key path. + expected_msg_short = u'[{0:s}...'.format(key_path[:76]) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/winrar.py b/plaso/parsers/winreg_plugins/winrar.py new file mode 100644 index 0000000..5fe2a41 --- /dev/null +++ b/plaso/parsers/winreg_plugins/winrar.py @@ -0,0 +1,87 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a parser for WinRAR for Plaso.""" + +import re + +from plaso.events import windows_events +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +__author__ = 'David Nides (david.nides@gmail.com)' + + +class WinRarHistoryPlugin(interface.KeyPlugin): + """Windows Registry plugin for parsing WinRAR History keys.""" + # TODO: Create NTUSER.DAT test file with WinRAR data. + + NAME = 'winreg_winrar' + DESCRIPTION = u'Parser for WinRAR History Registry data.' + + REG_TYPE = 'NTUSER' + REG_KEYS = [ + u'\\Software\\WinRAR\\DialogEditHistory\\ExtrPath', + u'\\Software\\WinRAR\\DialogEditHistory\\ArcName', + u'\\Software\\WinRAR\\ArcHistory'] + + _RE_VALUE_NAME = re.compile(r'^[0-9]+$', re.I) + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Collect values under WinRAR ArcHistory and return event for each one. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + for value in key.GetValues(): + # Ignore any value not in the form: '[0-9]+'. + if not value.name or not self._RE_VALUE_NAME.search(value.name): + continue + + # Ignore any value that is empty or that does not contain a string. + if not value.data or not value.DataIsString(): + continue + + if value.name == '0': + timestamp = key.last_written_timestamp + else: + timestamp = 0 + + text_dict = {} + text_dict[value.name] = value.data + + # TODO: shouldn't this behavior be, put all the values + # into a single event object with the last written time of the key? + event_object = windows_events.WindowsRegistryEvent( + timestamp, key.path, text_dict, offset=key.offset, + registry_type=registry_type, + source_append=': WinRAR History') + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(WinRarHistoryPlugin) diff --git a/plaso/parsers/winreg_plugins/winrar_test.py b/plaso/parsers/winreg_plugins/winrar_test.py new file mode 100644 index 0000000..1a44bab --- /dev/null +++ b/plaso/parsers/winreg_plugins/winrar_test.py @@ -0,0 +1,83 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the WinRAR Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import test_lib +from plaso.parsers.winreg_plugins import winrar +from plaso.winreg import test_lib as winreg_test_lib + + +class WinRarArcHistoryPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the WinRAR ArcHistory Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = winrar.WinRarHistoryPlugin() + + def testProcess(self): + """Tests the Process function.""" + key_path = u'\\Software\\WinRAR\\ArcHistory' + + values = [] + values.append(winreg_test_lib.TestRegValue( + '0', 'C:\\Downloads\\The Sleeping Dragon CD1.iso'.encode('utf_16_le'), + winreg_test_lib.TestRegValue.REG_SZ, offset=1892)) + values.append(winreg_test_lib.TestRegValue( + '1', 'C:\\Downloads\\plaso-static.rar'.encode('utf_16_le'), + winreg_test_lib.TestRegValue.REG_SZ, offset=612)) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-08-28 09:23:49.002031') + + winreg_key = winreg_test_lib.TestRegKey( + key_path, expected_timestamp, values, offset=1456) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 2) + + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + self.assertEquals(event_object.timestamp, expected_timestamp) + + expected_string = ( + u'[{0:s}] 0: C:\\Downloads\\The Sleeping Dragon CD1.iso').format( + key_path) + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + event_object = event_objects[1] + + self.assertEquals(event_object.timestamp, 0) + + expected_string = u'[{0:s}] 1: C:\\Downloads\\plaso-static.rar'.format( + key_path) + self._TestGetMessageStrings(event_object, expected_string, expected_string) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_plugins/winver.py b/plaso/parsers/winreg_plugins/winver.py new file mode 100644 index 0000000..878e074 --- /dev/null +++ b/plaso/parsers/winreg_plugins/winver.py @@ -0,0 +1,102 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plug-in to collect information about the Windows version.""" + +import construct + +from plaso.events import windows_events +from plaso.lib import timelib +from plaso.parsers import winreg +from plaso.parsers.winreg_plugins import interface + + +class WinVerPlugin(interface.KeyPlugin): + """Plug-in to collect information about the Windows version.""" + + NAME = 'winreg_winver' + DESCRIPTION = u'Parser for Windows version Registry data.' + + REG_KEYS = [u'\\Microsoft\\Windows NT\\CurrentVersion'] + REG_TYPE = 'SOFTWARE' + URLS = [] + + INT_STRUCT = construct.ULInt32('install') + + # TODO: Refactor remove this function in a later CL. + def GetValueString(self, key, value_name): + """Retrieves a specific string value from the Registry key. + + Args: + key: A Windows Registry key (instance of WinRegKey). + value_name: The name of the value. + + Returns: + A string value if one is available, otherwise an empty string. + """ + value = key.GetValue(value_name) + + if not value: + return '' + + if not value.data or not value.DataIsString(): + return '' + return value.data + + def GetEntries( + self, parser_context, key=None, registry_type=None, file_entry=None, + parser_chain=None, **unused_kwargs): + """Gather minimal information about system install and return an event. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: Optional Registry key (instance of winreg.WinRegKey). + The default is None. + registry_type: Optional Registry type string. The default is None. + file_entry: Optional file entry object (instance of dfvfs.FileEntry). + The default is None. + parser_chain: Optional string containing the parsing chain up to this + point. The default is None. + """ + text_dict = {} + text_dict[u'Owner'] = self.GetValueString(key, 'RegisteredOwner') + text_dict[u'sp'] = self.GetValueString(key, 'CSDBuildNumber') + text_dict[u'Product name'] = self.GetValueString(key, 'ProductName') + text_dict[u' Windows Version Information'] = u'' + + install_raw = key.GetValue('InstallDate').raw_data + # TODO: move this to a function in utils with a more descriptive name + # e.g. CopyByteStreamToInt32BigEndian. + try: + install = self.INT_STRUCT.parse(install_raw) + except construct.FieldError: + install = 0 + + event_object = windows_events.WindowsRegistryEvent( + timelib.Timestamp.FromPosixTime(install), key.path, text_dict, + usage='OS Install Time', offset=key.offset, + registry_type=registry_type, urls=self.URLS) + + event_object.prodname = text_dict[u'Product name'] + event_object.source_long = 'SOFTWARE WinVersion key' + if text_dict[u'Owner']: + event_object.owner = text_dict[u'Owner'] + parser_context.ProduceEvent( + event_object, parser_chain=parser_chain, file_entry=file_entry) + + +winreg.WinRegistryParser.RegisterPlugin(WinVerPlugin) diff --git a/plaso/parsers/winreg_plugins/winver_test.py b/plaso/parsers/winreg_plugins/winver_test.py new file mode 100644 index 0000000..4c69dc0 --- /dev/null +++ b/plaso/parsers/winreg_plugins/winver_test.py @@ -0,0 +1,85 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the WinVer Windows Registry plugin.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import winreg as winreg_formatter +from plaso.lib import timelib_test +from plaso.parsers.winreg_plugins import test_lib +from plaso.parsers.winreg_plugins import winver +from plaso.winreg import test_lib as winreg_test_lib + + +class WinVerPluginTest(test_lib.RegistryPluginTestCase): + """Tests for the WinVer Windows Registry plugin.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._plugin = winver.WinVerPlugin() + + def testWinVer(self): + """Test the WinVer plugin.""" + key_path = u'\\Microsoft\\Windows NT\\CurrentVersion' + values = [] + + values.append(winreg_test_lib.TestRegValue( + 'ProductName', 'MyTestOS'.encode('utf_16_le'), 1, 123)) + values.append(winreg_test_lib.TestRegValue( + 'CSDBuildNumber', '5'.encode('utf_16_le'), 1, 1892)) + values.append(winreg_test_lib.TestRegValue( + 'RegisteredOwner', 'A Concerned Citizen'.encode('utf_16_le'), 1, 612)) + values.append(winreg_test_lib.TestRegValue( + 'InstallDate', '\x13\x1aAP', 3, 1001)) + + winreg_key = winreg_test_lib.TestRegKey( + key_path, 1346445929000000, values, 153) + + event_queue_consumer = self._ParseKeyWithPlugin(self._plugin, winreg_key) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 1) + + event_object = event_objects[0] + + # This should just be the plugin name, as we're invoking it directly, + # and not through the parser. + self.assertEquals(event_object.parser, self._plugin.plugin_name) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2012-08-31 20:09:55') + self.assertEquals(event_object.timestamp, expected_timestamp) + + # Note that the double spaces here are intentional. + expected_msg = ( + u'[{0:s}] ' + u'Windows Version Information: ' + u'Owner: A Concerned Citizen ' + u'Product name: MyTestOS sp: 5').format(key_path) + + expected_msg_short = ( + u'[{0:s}] ' + u'Windows Version Information: ' + u'Owner: ...').format(key_path) + + self._TestGetMessageStrings(event_object, expected_msg, expected_msg_short) + # TODO: Write a test for a non-synthetic key + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/winreg_test.py b/plaso/parsers/winreg_test.py new file mode 100644 index 0000000..a20c46f --- /dev/null +++ b/plaso/parsers/winreg_test.py @@ -0,0 +1,106 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Windows Registry file parser.""" + +import unittest + +from plaso.parsers import test_lib +from plaso.parsers import winreg + + +class WinRegTest(test_lib.ParserTestCase): + """Tests for the Windows Registry file parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = winreg.WinRegistryParser() + + def _GetParserChains(self, event_objects): + """Return a dict with a plugin count given a list of event objects.""" + parser_chains = {} + for event_object in event_objects: + parser_chain = getattr(event_object, 'parser', None) + if not parser_chain: + continue + + if parser_chain in parser_chains: + parser_chains[parser_chain] += 1 + else: + parser_chains[parser_chain] = 1 + + return parser_chains + + def _PluginNameToParserChain(self, plugin_name): + """Generate the correct parser chain for a given plugin.""" + return 'winreg/{0:s}'.format(plugin_name) + + def testNtuserParsing(self): + """Parse a NTUSER.dat file and check few items.""" + knowledge_base_values = {'current_control_set': u'ControlSet001'} + test_file = self._GetTestFilePath(['NTUSER.DAT']) + event_queue_consumer = self._ParseFile( + self._parser, test_file, knowledge_base_values=knowledge_base_values) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + parser_chains = self._GetParserChains(event_objects) + + # The _registry_type member is created dynamically by invoking + # the _GetParserChains function. + registry_type = getattr(self._parser, '_registry_type', '') + self.assertEquals(registry_type, 'NTUSER') + + expected_chain = self._PluginNameToParserChain('winreg_userassist') + self.assertTrue(expected_chain in parser_chains) + + self.assertEquals(parser_chains[expected_chain], 14) + + def testSystemParsing(self): + """Parse a SYSTEM hive an run few tests.""" + knowledge_base_values = {'current_control_set': u'ControlSet001'} + test_file = self._GetTestFilePath(['SYSTEM']) + event_queue_consumer = self._ParseFile( + self._parser, test_file, knowledge_base_values=knowledge_base_values) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + parser_chains = self._GetParserChains(event_objects) + + # The _registry_type member is created dynamically by invoking + # the _GetParserChains function. + registry_type = getattr(self._parser, '_registry_type', '') + self.assertEquals(registry_type, 'SYSTEM') + + # Check the existence of few known plugins, see if they + # are being properly picked up and are parsed. + plugin_names = ['winreg_usbstor', 'winreg_boot_execute', 'winreg_services'] + for plugin in plugin_names: + expected_chain = self._PluginNameToParserChain(plugin) + self.assertTrue( + expected_chain in parser_chains, + u'Chain {0:s} not found in events.'.format(expected_chain)) + + # Check that the number of events produced by each plugin are correct. + self.assertEquals(parser_chains.get( + self._PluginNameToParserChain('winreg_usbstor'), 0), 3) + self.assertEquals(parser_chains.get( + self._PluginNameToParserChain('winreg_boot_execute'), 0), 2) + self.assertEquals(parser_chains.get( + self._PluginNameToParserChain('winreg_services'), 0), 831) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/xchatlog.py b/plaso/parsers/xchatlog.py new file mode 100644 index 0000000..8ff1a6a --- /dev/null +++ b/plaso/parsers/xchatlog.py @@ -0,0 +1,264 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains XChat log file parser in plaso. + + Information updated 24 July 2013. + + The parser applies to XChat log files. Despite their apparent + simplicity it's not straightforward to manage every possible case. + XChat tool allows users to specify how timestamp will be + encoded (using the strftime function), by letting them to specify + additional separators. This parser will accept only the simplest + default English form of an XChat log file, as the following: + + **** BEGIN LOGGING AT Mon Dec 31 21:11:55 2001 + + dec 31 21:11:55 --> You are now talking on #gugle + dec 31 21:11:55 --- Topic for #gugle is plaso, nobody knows what it means + dec 31 21:11:55 Topic for #gugle set by Kristinn + dec 31 21:11:55 --- Joachim gives voice to fpi + dec 31 21:11:55 * XChat here + dec 31 21:11:58 ola plas-ing guys! + dec 31 21:12:00 ftw! + + It could be managed the missing month/day case too, by extracting + the month/day information from the header. But the parser logic + would become intricate, since it would need to manage day transition, + chat lines crossing the midnight. From there derives the last day of + the year bug, since the parser will not manage that transition. + + Moreover the strftime is locale-dependant, so month names, footer and + headers can change, even inside the same log file. Being said that, the + following will be the main logic used to parse the log files (note that + the first header *must be* '**** BEGIN ...' otherwise file will be skipped). + + 1) Check for '****' + 1.1) If 'BEGIN LOGGING AT' (English) + 1.1.1) Extract the YEAR + 1.1.2) Generate new event start logging + 1.1.3) set parsing = True + 1.2) If 'END LOGGING' + 1.2.1) If parsing, set parsing=False + 1.2.2) If not parsing, log debug + 1.2.3) Generate new event end logging + 1.3) If not BEGIN|END we are facing a different language + and we don't now which language! + If parsing is True, set parsing=False and log debug + 2) Not '****' so we are parsing a line + 2.1) If parsing = True, try to parse line and generate event + 2.2) If parsing = False, skip until next good header is found + + References + http://xchat.org +""" + +import logging + +import pyparsing + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class XChatLogEvent(time_events.TimestampEvent): + """Convenience class for a XChat Log line event.""" + DATA_TYPE = 'xchat:log:line' + + def __init__(self, timestamp, text, nickname=None): + """Initializes the event object. + + Args: + timestamp: Microseconds since Epoch in UTC. + text: The text sent by nickname or other text (server, messages, etc.). + """ + super(XChatLogEvent, self).__init__( + timestamp, eventdata.EventTimestamp.ADDED_TIME) + self.text = text + if nickname: + self.nickname = nickname + + +class XChatLogParser(text_parser.PyparsingSingleLineTextParser): + """Parse XChat log files.""" + + NAME = 'xchatlog' + DESCRIPTION = u'Parser for XChat log files.' + + ENCODING = 'UTF-8' + + # Common (header/footer/body) pyparsing structures. + # TODO: Only English ASCII timestamp supported ATM, add support for others. + IGNORE_STRING = pyparsing.Word(pyparsing.printables).suppress() + LOG_ACTION = pyparsing.Word( + pyparsing.printables, min=3, max=5).setResultsName('log_action') + MONTH_NAME = pyparsing.Word( + pyparsing.printables, exact=3).setResultsName('month_name') + DAY = pyparsing.Word(pyparsing.nums, max=2).setParseAction( + text_parser.PyParseIntCast).setResultsName('day') + TIME = text_parser.PyparsingConstants.TIME.setResultsName('time') + YEAR = text_parser.PyparsingConstants.YEAR.setResultsName('year') + NICKNAME = pyparsing.QuotedString( + u'<', endQuoteChar=u'>').setResultsName('nickname') + TEXT = pyparsing.SkipTo(pyparsing.lineEnd).setResultsName('text') + + # Header/footer pyparsing structures. + # Sample: "**** BEGIN LOGGING AT Mon Dec 31 21:11:55 2011". + # Note that "BEGIN LOGGING" text is localized (default, English) and can be + # different if XChat locale is different. + HEADER_SIGNATURE = pyparsing.Keyword(u'****') + HEADER = ( + HEADER_SIGNATURE.suppress() + LOG_ACTION + + pyparsing.Keyword(u'LOGGING AT').suppress() + IGNORE_STRING + + MONTH_NAME + DAY + TIME + YEAR) + + # Body (nickname, text and/or service messages) pyparsing structures. + # Sample: "dec 31 21:11:58 ola plas-ing guys!". + LOG_LINE = MONTH_NAME + DAY + TIME + pyparsing.Optional(NICKNAME) + TEXT + + # Define the available log line structures. + LINE_STRUCTURES = [ + ('logline', LOG_LINE), + ('header', HEADER), + ('header_signature', HEADER_SIGNATURE), + ] + + def __init__(self): + """Initializes a XChatLog parser object.""" + super(XChatLogParser, self).__init__() + self.offset = 0 + self.xchat_year = 0 + + def _GetTimestamp(self, parse_result, timezone, year=0): + """Determines the timestamp from the pyparsing ParseResults. + + Args: + parse_result: The pyparsing ParseResults object. + timezone: The timezone object. + year: Optional current year. The default is 0. + + Returns: + A timelib timestamp or 0. + """ + month = timelib.MONTH_DICT.get(parse_result.month_name.lower(), None) + if not month: + logging.debug(u'XChatLog unmanaged month name [{0:s}]'.format( + parse_result.month_name)) + return 0 + + hour, minute, second = parse_result.time + if not year: + # This condition could happen when parsing the header line: if unable + # to get a valid year, returns a '0' timestamp, thus preventing any + # log line parsing (since xchat_year is unset to '0') until a new good + # (it means supported) header with a valid year information is found. + # TODO: reconsider this behaviour. + year = parse_result.get('year', 0) + + if not year: + return 0 + + self.xchat_year = year + + day = parse_result.get('day', 0) + return timelib.Timestamp.FromTimeParts( + year, month, day, hour, minute, second, timezone=timezone) + + def VerifyStructure(self, parser_context, line): + """Verify that this file is a XChat log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + line: A single line from the text file. + + Returns: + True if this is the correct parser, False otherwise. + """ + try: + parse_result = self.HEADER.parseString(line) + except pyparsing.ParseException: + logging.debug(u'Unable to parse, not a valid XChat log file header') + return False + timestamp = self._GetTimestamp(parse_result, parser_context.timezone) + if not timestamp: + logging.debug(u'Wrong XChat timestamp: {0:s}'.format(parse_result)) + return False + # Unset the xchat_year since we are only verifying structure. + # The value gets set in _GetTimestamp during the actual parsing. + self.xchat_year = 0 + return True + + def ParseRecord(self, parser_context, key, structure): + """Parse each record structure and return an event object if applicable. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + if key == 'logline': + if not self.xchat_year: + logging.debug(u'XChatLogParser, missing year information.') + return + timestamp = self._GetTimestamp( + structure, parser_context.timezone, year=self.xchat_year) + if not timestamp: + logging.debug(u'XChatLogParser, cannot get timestamp from line.') + return + # The text string contains multiple unnecessary whitespaces that need to + # be removed, thus the split and re-join. + return XChatLogEvent( + timestamp, u' '.join(structure.text.split()), structure.nickname) + elif key == 'header': + timestamp = self._GetTimestamp(structure, parser_context.timezone) + if not timestamp: + logging.warning(u'XChatLogParser, cannot get timestamp from header.') + return + if structure.log_action == u'BEGIN': + return XChatLogEvent(timestamp, u'XChat start logging') + elif structure.log_action == u'END': + # End logging, unset year. + self.xchat_year = 0 + return XChatLogEvent(timestamp, u'XChat end logging') + else: + logging.warning(u'Unknown log action: {0:s}.'.format( + structure.log_action)) + elif key == 'header_signature': + # If this key is matched (after others keys failed) we got a different + # localized header and we should stop parsing until a new good header + # is found. Stop parsing is done setting xchat_year to 0. + # Note that the code assumes that LINE_STRUCTURES will be used in the + # exact order as defined! + logging.warning(u'Unknown locale header.') + self.xchat_year = 0 + else: + logging.warning( + u'Unable to parse record, unknown structure: {0:s}'.format(key)) + + +manager.ParsersManager.RegisterParser(XChatLogParser) diff --git a/plaso/parsers/xchatlog_test.py b/plaso/parsers/xchatlog_test.py new file mode 100644 index 0000000..429fadd --- /dev/null +++ b/plaso/parsers/xchatlog_test.py @@ -0,0 +1,101 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the xchatlog parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import xchatlog as xchatlog_formatter +from plaso.lib import timelib_test +from plaso.parsers import test_lib +from plaso.parsers import xchatlog + +import pytz + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class XChatLogUnitTest(test_lib.ParserTestCase): + """Tests for the xchatlog parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = xchatlog.XChatLogParser() + + def testParse(self): + """Tests the Parse function.""" + knowledge_base_values = {'zone': pytz.timezone('Europe/Rome')} + test_file = self._GetTestFilePath(['xchat.log']) + event_queue_consumer = self._ParseFile( + self._parser, test_file, knowledge_base_values=knowledge_base_values) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 9) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-12-31 21:11:55+01:00') + self.assertEquals(event_objects[0].timestamp, expected_timestamp) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-12-31 23:00:00+01:00') + self.assertEquals(event_objects[7].timestamp, expected_timestamp) + + expected_timestamp = timelib_test.CopyStringToTimestamp( + '2011-12-31 23:59:00+01:00') + self.assertEquals(event_objects[8].timestamp, expected_timestamp) + + expected_string = u'XChat start logging' + self._TestGetMessageStrings( + event_objects[0], expected_string, expected_string) + + expected_string = u'--> You are now talking on #gugle' + self._TestGetMessageStrings( + event_objects[1], expected_string, expected_string) + + expected_string = u'--- Topic for #gugle is plaso, a difficult word' + self._TestGetMessageStrings( + event_objects[2], expected_string, expected_string) + + expected_string = u'Topic for #gugle set by Kristinn' + self._TestGetMessageStrings( + event_objects[3], expected_string, expected_string) + + expected_string = u'--- Joachim gives voice to fpi' + self._TestGetMessageStrings( + event_objects[4], expected_string, expected_string) + + expected_string = u'* XChat here' + self._TestGetMessageStrings( + event_objects[5], expected_string, expected_string) + + expected_string = u'[nickname: fpi] ola plas-ing guys!' + self._TestGetMessageStrings( + event_objects[6], expected_string, expected_string) + + expected_string = u'[nickname: STRANGER] \u65e5\u672c' + self._TestGetMessageStrings( + event_objects[7], expected_string, expected_string) + + expected_string = u'XChat end logging' + self._TestGetMessageStrings( + event_objects[8], expected_string, expected_string) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/parsers/xchatscrollback.py b/plaso/parsers/xchatscrollback.py new file mode 100644 index 0000000..23d0fb0 --- /dev/null +++ b/plaso/parsers/xchatscrollback.py @@ -0,0 +1,212 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains XChat scrollback log file parser in plaso. + + Information updated 06 September 2013. + + Besides the logging capability, the XChat IRC client has the option to + record the text for opened tabs. So, when rejoining a particular channel + and/or a particular conversation, XChat will display the last messages + exchanged. This artifact could be present, if not disabled, even if + normal logging is disabled. + + From the XChat FAQ (http://xchatdata.net/Using/FAQ): + Q: 'How do I keep text from previous sessions from being displayed when + I join a channel?' + R: 'Starting in XChat 2.8.4, XChat implemented the Scrollback feature which + displays text from the last time you had a particular tab open. + To disable this setting for all channels, Go to Settings -> Preferences + -> Logging and uncheck Display scrollback from previous session. + In XChat 2.8.6, XChat implemented both Per Channel Logging, and + Per Channel Scrollbacks. If you are on 2.8.6 or newer, you can disable + loading scrollback for just one particular tab name by right clicking on + the tab name, selecting Settings, and then unchecking Reload scrollback' + + The log file format differs from logging format, but it's quite simple + 'T 1232315916 Python interface unloaded' + <\n> + + The time reported in the log is Unix Epoch (from source code, time(0)). + The part could contain some 'decorators' (bold, underline, colors + indication, etc.), so the parser should strip those control fields. + + References + http://xchat.org +""" + +import logging + +import pyparsing + +from plaso.events import time_events +from plaso.lib import eventdata +from plaso.lib import timelib +from plaso.parsers import manager +from plaso.parsers import text_parser + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class XChatScrollbackEvent(time_events.PosixTimeEvent): + """Convenience class for a XChat Scrollback line event.""" + DATA_TYPE = 'xchat:scrollback:line' + + def __init__(self, timestamp, offset, nickname, text): + """Initializes the event object. + + Args: + timestamp: The timestamp time value, epoch. + offset: The offset of the event. + nickname: The nickname used. + text: The text sent by nickname or other text (server, messages, etc.). + """ + super(XChatScrollbackEvent, self).__init__( + timestamp, eventdata.EventTimestamp.ADDED_TIME) + self.offset = offset + self.nickname = nickname + self.text = text + + +class XChatScrollbackParser(text_parser.PyparsingSingleLineTextParser): + """Parse XChat scrollback log files.""" + + NAME = 'xchatscrollback' + DESCRIPTION = u'Parser for XChat scrollback log files.' + + ENCODING = 'UTF-8' + + # Define how a log line should look like. + LOG_LINE = ( + pyparsing.Literal(u'T').suppress() + + pyparsing.Word(pyparsing.nums).setResultsName('epoch') + + pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('text')) + LOG_LINE.parseWithTabs() + + # Define the available log line structures. + LINE_STRUCTURES = [ + ('logline', LOG_LINE), + ] + + # Define for the stripping phase. + STRIPPER = ( + pyparsing.Word(u'\x03', pyparsing.nums, max=3).suppress() | + pyparsing.Word(u'\x02\x07\x08\x0f\x16\x1d\x1f', exact=1).suppress()) + + # Define the structure for parsing and get and + MSG_NICK_START = pyparsing.Literal(u'<') + MSG_NICK_END = pyparsing.Literal(u'>') + MSG_NICK = pyparsing.SkipTo(MSG_NICK_END).setResultsName('nickname') + MSG_ENTRY_NICK = pyparsing.Optional(MSG_NICK_START + MSG_NICK + MSG_NICK_END) + MSG_ENTRY_TEXT = pyparsing.SkipTo(pyparsing.LineEnd()).setResultsName('text') + MSG_ENTRY = MSG_ENTRY_NICK + MSG_ENTRY_TEXT + MSG_ENTRY.parseWithTabs() + + def __init__(self): + """Initializes a parser object.""" + super(XChatScrollbackParser, self).__init__() + self.use_local_zone = False + self.offset = 0 + + def VerifyStructure(self, parser_context, line): + """Verify that this file is a XChat scrollback log file. + + Args: + parser_context: A parser context object (instance of ParserContext). + line: A single line from the text file. + + Returns: + True if this is the correct parser, False otherwise. + """ + structure = self.LOG_LINE + parsed_structure = None + epoch = None + try: + parsed_structure = structure.parseString(line) + except pyparsing.ParseException: + logging.debug(u'Not a XChat scrollback log file') + return False + try: + epoch = int(parsed_structure.epoch) + except ValueError: + logging.debug(u'Not a XChat scrollback log file, invalid epoch string') + return False + if not timelib.Timestamp.FromPosixTime(epoch): + logging.debug(u'Not a XChat scrollback log file, invalid timestamp') + return False + return True + + def ParseRecord(self, parser_context, key, structure): + """Parse each record structure and return an EventObject if applicable. + + Args: + parser_context: A parser context object (instance of ParserContext). + key: An identification string indicating the name of the parsed + structure. + structure: A pyparsing.ParseResults object from a line in the + log file. + + Returns: + An event object (instance of EventObject) or None. + """ + if key != 'logline': + logging.warning( + u'Unable to parse record, unknown structure: {0:s}'.format(key)) + return + + try: + epoch = int(structure.epoch) + except ValueError: + logging.debug(u'Invalid epoch string {0:s}, skipping record'.format( + structure.epoch)) + return + + try: + nickname, text = self._StripThenGetNicknameAndText(structure.text) + except pyparsing.ParseException: + logging.debug(u'Error parsing entry at offset {0:d}'.format(self.offset)) + return + + return XChatScrollbackEvent(epoch, self.offset, nickname, text) + + def _StripThenGetNicknameAndText(self, text): + """Strips decorators from text and gets if available. + + This method implements the XChat strip_color2 and fe_print_text + functions, slightly modified to get pure text. From the parsing point + of view, after having stripped, the code takes everything as is, + simply replacing tabs with spaces (as the original XChat code). + So the VerifyStructure plays an important role in checking if + the source file has the right format, since the method will not raise + any parse exception and every content will be good. + + Args: + text: The text obtained from the record entry. + + Returns: + A list containing two entries: + nickname: The nickname if present. + text: The text written by nickname or service messages. + """ + stripped = self.STRIPPER.transformString(text) + structure = self.MSG_ENTRY.parseString(stripped) + text = structure.text.replace(u'\t', u' ') + return structure.nickname, text + + +manager.ParsersManager.RegisterParser(XChatScrollbackParser) diff --git a/plaso/parsers/xchatscrollback_test.py b/plaso/parsers/xchatscrollback_test.py new file mode 100644 index 0000000..745a217 --- /dev/null +++ b/plaso/parsers/xchatscrollback_test.py @@ -0,0 +1,87 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the xchatscrollback log parser.""" + +import unittest + +# pylint: disable=unused-import +from plaso.formatters import xchatscrollback as xchatscrollback_formatter +from plaso.parsers import test_lib +from plaso.parsers import xchatscrollback + + +__author__ = 'Francesco Picasso (francesco.picasso@gmail.com)' + + +class XChatScrollbackUnitTest(test_lib.ParserTestCase): + """Tests for the xchatscrollback log parser.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._parser = xchatscrollback.XChatScrollbackParser() + + def testParse(self): + """Tests the Parse function.""" + test_file = self._GetTestFilePath(['xchatscrollback.log']) + event_queue_consumer = self._ParseFile(self._parser, test_file) + event_objects = self._GetEventObjectsFromQueue(event_queue_consumer) + + self.assertEquals(len(event_objects), 10) + + # TODO: refactor this to use timelib_test. + self.assertEquals(event_objects[0].timestamp, 1232074579000000) + self.assertEquals(event_objects[1].timestamp, 1232074587000000) + self.assertEquals(event_objects[2].timestamp, 1232315916000000) + self.assertEquals(event_objects[3].timestamp, 1232315916000000) + self.assertEquals(event_objects[4].timestamp, 1232959856000000) + self.assertEquals(event_objects[5].timestamp, 0) + self.assertEquals(event_objects[7].timestamp, 1232959862000000) + self.assertEquals(event_objects[8].timestamp, 1232959932000000) + self.assertEquals(event_objects[9].timestamp, 1232959993000000) + + expected_string = u'[] * Speaking now on ##plaso##' + self._TestGetMessageStrings( + event_objects[0], expected_string, expected_string) + + expected_string = u'[] * Joachim \xe8 uscito (Client exited)' + self._TestGetMessageStrings( + event_objects[1], expected_string, expected_string) + + expected_string = u'[] Tcl interface unloaded' + self._TestGetMessageStrings( + event_objects[2], expected_string, expected_string) + + expected_string = u'[] Python interface unloaded' + self._TestGetMessageStrings( + event_objects[3], expected_string, expected_string) + + expected_string = u'[] * Topic of #plasify \xe8: .' + self._TestGetMessageStrings( + event_objects[6], expected_string, expected_string) + + expected_string = u'[nickname: fpi] Hi Kristinn!' + self._TestGetMessageStrings( + event_objects[8], expected_string, expected_string) + + expected_string = u'[nickname: Kristinn] GO AND WRITE PARSERS!!! O_o' + self._TestGetMessageStrings( + event_objects[9], expected_string, expected_string) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/preprocessors/__init__.py b/plaso/preprocessors/__init__.py new file mode 100644 index 0000000..ae14266 --- /dev/null +++ b/plaso/preprocessors/__init__.py @@ -0,0 +1,22 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains an import statement for each preprocess plugin.""" + +from plaso.preprocessors import linux +from plaso.preprocessors import macosx +from plaso.preprocessors import windows diff --git a/plaso/preprocessors/interface.py b/plaso/preprocessors/interface.py new file mode 100644 index 0000000..2a8dfc4 --- /dev/null +++ b/plaso/preprocessors/interface.py @@ -0,0 +1,223 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains classes used for preprocessing in plaso.""" + +import abc +import logging + +from dfvfs.helpers import file_system_searcher + +from plaso.lib import errors + + +class PreprocessPlugin(object): + """Class that defines the preprocess plugin object interface. + + Any preprocessing plugin that implements this interface + should define which operating system this plugin supports. + + The OS variable supports the following values: + + Windows + + Linux + + MacOSX + + Since some plugins may require knowledge gained from + other checks all plugins have a weight associated to it. + The weight variable can have values from one to three: + + 1 - Requires no prior knowledge, can run immediately. + + 2 - Requires knowledge from plugins with weight 1. + + 3 - Requires knowledge from plugins with weight 2. + + The default weight of 3 is assigned to plugins, so each + plugin needs to overwrite that value if needed. + + The plugins are grouped by the operating system they work + on and then on their weight. That means that if the tool + is run against a Windows system all plugins that support + Windows are grouped together, and only plugins with weight + one are run, then weight two followed by the rest of the + plugins with the weight of three. There is no priority or + guaranteed order of plugins that have the same weight, which + makes it important to define the weight appropriately. + """ + + # Defines the OS that this plugin supports. + SUPPORTED_OS = [] + + # Weight is an INT, with the value of 1-3. + WEIGHT = 3 + + # Defines the knowledge base attribute to be set. + ATTRIBUTE = '' + + @property + def plugin_name(self): + """Return the name of the plugin.""" + return self.__class__.__name__ + + def _FindFileEntry(self, searcher, path): + """Searches for a file entry that matches the path. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + path: The location of the file entry relative to the file system + of the searcher. + + Returns: + The file entry if successful or None otherwise. + + Raises: + errors.PreProcessFail: if the file entry cannot be found or opened. + """ + find_spec = file_system_searcher.FindSpec( + location=path, case_sensitive=False) + + path_specs = list(searcher.Find(find_specs=[find_spec])) + if not path_specs or len(path_specs) != 1: + raise errors.PreProcessFail(u'Unable to find: {0:s}'.format(path)) + + try: + file_entry = searcher.GetFileEntryByPathSpec(path_specs[0]) + except IOError as exception: + raise errors.PreProcessFail( + u'Unable to retrieve file entry: {0:s} with error: {1:s}'.format( + path, exception)) + + return file_entry + + @abc.abstractmethod + def GetValue(self, searcher, knowledge_base): + """Return the value for the attribute. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + """ + raise NotImplementedError + + def Run(self, searcher, knowledge_base): + """Set the attribute of the object store to the value from GetValue. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + """ + value = self.GetValue(searcher, knowledge_base) + knowledge_base.SetValue(self.ATTRIBUTE, value) + value = knowledge_base.GetValue(self.ATTRIBUTE, default_value=u'N/A') + logging.info(u'[PreProcess] Set attribute: {0:s} to {1:s}'.format( + self.ATTRIBUTE, value)) + + +class PathPreprocessPlugin(PreprocessPlugin): + """Return a simple path.""" + + WEIGHT = 1 + + def GetValue(self, searcher, unused_knowledge_base): + """Returns the path as found by the searcher. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + + Returns: + The first path location string. + + Raises: + PreProcessFail: if the path could not be found. + """ + find_spec = file_system_searcher.FindSpec( + location_regex=self.PATH, case_sensitive=False) + path_specs = list(searcher.Find(find_specs=[find_spec])) + + if not path_specs: + raise errors.PreProcessFail( + u'Unable to find path: {0:s}'.format(self.PATH)) + + relative_path = searcher.GetRelativePath(path_specs[0]) + if not relative_path: + raise errors.PreProcessFail( + u'Missing relative path for: {0:s}'.format(self.PATH)) + + return relative_path + + +def GuessOS(searcher): + """Returns a string representing what we think the underlying OS is. + + The available return strings are: + + Windows + + MacOSX + + Linux + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + + Returns: + A string indicating which OS we are dealing with. + """ + find_specs = [ + file_system_searcher.FindSpec( + location=u'/etc', case_sensitive=False), + file_system_searcher.FindSpec( + location=u'/System/Library', case_sensitive=False), + file_system_searcher.FindSpec( + location=u'/Windows/System32', case_sensitive=False), + file_system_searcher.FindSpec( + location=u'/WINNT/System32', case_sensitive=False), + file_system_searcher.FindSpec( + location=u'/WINNT35/System32', case_sensitive=False), + file_system_searcher.FindSpec( + location=u'/WTSRV/System32', case_sensitive=False)] + + locations = [] + for path_spec in searcher.Find(find_specs=find_specs): + relative_path = searcher.GetRelativePath(path_spec) + if relative_path: + locations.append(relative_path.lower()) + + # We need to check for both forward and backward slashes since the path + # spec will be OS dependent, as in running the tool on Windows will return + # Windows paths (backward slash) vs. forward slash on *NIX systems. + windows_locations = set([ + u'/windows/system32', u'\\windows\\system32', u'/winnt/system32', + u'\\winnt\\system32', u'/winnt35/system32', u'\\winnt35\\system32', + u'\\wtsrv\\system32', u'/wtsrv/system32']) + + if windows_locations.intersection(set(locations)): + return 'Windows' + + if u'/system/library' in locations: + return 'MacOSX' + + if u'/etc' in locations: + return 'Linux' + + return 'None' diff --git a/plaso/preprocessors/linux.py b/plaso/preprocessors/linux.py new file mode 100644 index 0000000..1e5075a --- /dev/null +++ b/plaso/preprocessors/linux.py @@ -0,0 +1,118 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains preprocessors for Linux.""" + +import csv + +from dfvfs.helpers import text_file + +from plaso.lib import errors +from plaso.preprocessors import interface +from plaso.preprocessors import manager + + +class LinuxHostname(interface.PreprocessPlugin): + """A preprocessing class that fetches hostname on Linux.""" + + SUPPORTED_OS = ['Linux'] + WEIGHT = 1 + ATTRIBUTE = 'hostname' + + def GetValue(self, searcher, unused_knowledge_base): + """Determines the hostname based on the contents of /etc/hostname. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + + Returns: + The hostname. + + Raises: + errors.PreProcessFail: if the preprocessing fails. + """ + path = u'/etc/hostname' + file_entry = self._FindFileEntry(searcher, path) + if not file_entry: + raise errors.PreProcessFail( + u'Unable to find file entry for path: {0:s}.'.format(path)) + + file_object = file_entry.GetFileObject() + file_data = file_object.read(512) + file_object.close() + + hostname, _, _ = file_data.partition('\n') + return u'{0:s}'.format(hostname) + + +class LinuxUsernames(interface.PreprocessPlugin): + """A preprocessing class that fetches usernames on Linux.""" + + SUPPORTED_OS = ['Linux'] + WEIGHT = 1 + ATTRIBUTE = 'users' + + def GetValue(self, searcher, unused_knowledge_base): + """Determines the user information based on the contents of /etc/passwd. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + + Returns: + A list containing username information dicts. + + Raises: + errors.PreProcessFail: if the preprocessing fails. + """ + # TODO: Add passwd.cache, might be good if nss cache is enabled. + + path = u'/etc/passwd' + file_entry = self._FindFileEntry(searcher, path) + if not file_entry: + raise errors.PreProcessFail( + u'Unable to find file entry for path: {0:s}.'.format(path)) + + file_object = file_entry.GetFileObject() + text_file_object = text_file.TextFile(file_object) + + reader = csv.reader(text_file_object, delimiter=':') + + users = [] + for row in reader: + # TODO: as part of artifacts, create a proper object for this. + user = { + 'uid': row[2], + 'gid': row[3], + 'name': row[0], + 'path': row[5], + 'shell': row[6]} + users.append(user) + + file_object.close() + return users + + +manager.PreprocessPluginsManager.RegisterPlugins([ + LinuxHostname, LinuxUsernames]) diff --git a/plaso/preprocessors/linux_test.py b/plaso/preprocessors/linux_test.py new file mode 100644 index 0000000..53095b5 --- /dev/null +++ b/plaso/preprocessors/linux_test.py @@ -0,0 +1,100 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Linux preprocess plug-ins.""" + +import unittest + +from dfvfs.helpers import file_system_searcher +from dfvfs.path import fake_path_spec + +from plaso.artifacts import knowledge_base +from plaso.preprocessors import linux +from plaso.preprocessors import test_lib + + +class LinuxHostnameTest(test_lib.PreprocessPluginTest): + """Tests for the Linux hostname preprocess plug-in object.""" + + _FILE_DATA = ( + 'plaso.kiddaland.net\n') + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._fake_file_system = self._BuildSingleFileFakeFileSystem( + u'/etc/hostname', self._FILE_DATA) + + mount_point = fake_path_spec.FakePathSpec(location=u'/') + self._searcher = file_system_searcher.FileSystemSearcher( + self._fake_file_system, mount_point) + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + plugin = linux.LinuxHostname() + plugin.Run(self._searcher, knowledge_base_object) + + self.assertEquals(knowledge_base_object.hostname, u'plaso.kiddaland.net') + + +class LinuxUsernamesTest(test_lib.PreprocessPluginTest): + """Tests for the Linux usernames preprocess plug-in object.""" + + _FILE_DATA = ( + 'root:x:0:0:root:/root:/bin/bash\n' + 'bin:x:1:1:bin:/bin:/sbin/nologin\n' + 'daemon:x:2:2:daemon:/sbin:/sbin/nologin\n' + 'adm:x:3:4:adm:/var/adm:/sbin/nologin\n' + 'lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin\n' + 'sync:x:5:0:sync:/sbin:/bin/sync\n' + 'shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\n' + 'halt:x:7:0:halt:/sbin:/sbin/halt\n' + 'mail:x:8:12:mail:/var/spool/mail:/sbin/nologin\n' + 'operator:x:11:0:operator:/root:/sbin/nologin\n' + 'games:x:12:100:games:/usr/games:/sbin/nologin\n' + 'ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin\n' + 'nobody:x:99:99:Nobody:/:/sbin/nologin\n') + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._fake_file_system = self._BuildSingleFileFakeFileSystem( + u'/etc/passwd', self._FILE_DATA) + + mount_point = fake_path_spec.FakePathSpec(location=u'/') + self._searcher = file_system_searcher.FileSystemSearcher( + self._fake_file_system, mount_point) + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + plugin = linux.LinuxUsernames() + plugin.Run(self._searcher, knowledge_base_object) + + users = knowledge_base_object.GetValue('users') + self.assertEquals(len(users), 13) + + self.assertEquals(users[11].get('uid', None), u'14') + self.assertEquals(users[11].get('gid', None), u'50') + self.assertEquals(users[11].get('name', None), u'ftp') + self.assertEquals(users[11].get('path', None), u'/var/ftp') + self.assertEquals(users[11].get('shell', None), u'/sbin/nologin') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/preprocessors/macosx.py b/plaso/preprocessors/macosx.py new file mode 100644 index 0000000..5ae5843 --- /dev/null +++ b/plaso/preprocessors/macosx.py @@ -0,0 +1,390 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains preprocessors for Mac OS X.""" + +import logging + +from binplist import binplist +from dfvfs.helpers import file_system_searcher +from xml.etree import ElementTree + +from plaso.lib import errors +from plaso.lib import utils +from plaso.parsers.plist_plugins import interface as plist_interface +from plaso.preprocessors import interface +from plaso.preprocessors import manager + + +class PlistPreprocessPlugin(interface.PreprocessPlugin): + """Class that defines the plist preprocess plugin object.""" + + SUPPORTED_OS = ['MacOSX'] + WEIGHT = 2 + + # Path to the plist file to be parsed, can depend on paths discovered + # in previous preprocessors. + PLIST_PATH = '' + + # The key that's value should be returned back. It is an ordered list + # of preference. If the first value is found it will be returned and no + # others will be searched. + PLIST_KEYS = [''] + + def GetValue(self, searcher, unused_knowledge_base): + """Returns a value retrieved from keys within a plist file. + + Where the name of the keys are defined in PLIST_KEYS. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + + Returns: + The value of the first key that is found. + + Raises: + errors.PreProcessFail: if the preprocessing fails. + """ + file_entry = self._FindFileEntry(searcher, self.PLIST_PATH) + if not file_entry: + raise errors.PreProcessFail( + u'Unable to open file: {0:s}'.format(self.PLIST_PATH)) + + file_object = file_entry.GetFileObject() + value = self.ParseFile(file_entry, file_object) + file_object.close() + + return value + + def ParseFile(self, file_entry, file_object): + """Parses the plist file and returns the parsed key. + + Args: + file_entry: The file entry (instance of dfvfs.FileEntry). + file_object: The file-like object. + + Returns: + The value of the first key defined by PLIST_KEYS that is found. + + Raises: + errors.PreProcessFail: if the preprocessing fails. + """ + try: + plist_file = binplist.BinaryPlist(file_object) + top_level_object = plist_file.Parse() + + except binplist.FormatError as exception: + raise errors.PreProcessFail( + u'File is not a plist: {0:s} with error: {1:s}'.format( + file_entry.path_spec.comparable, exception)) + + except OverflowError as exception: + raise errors.PreProcessFail( + u'Unable to process plist: {0:s} with error: {1:s}'.format( + file_entry.path_spec.comparable, exception)) + + if not plist_file: + raise errors.PreProcessFail( + u'File is not a plist: {0:s}'.format(file_entry.path_spec.comparable)) + + match = None + key_name = '' + for plist_key in self.PLIST_KEYS: + try: + match = plist_interface.GetKeys( + top_level_object, frozenset([plist_key])) + except KeyError: + continue + if match: + key_name = plist_key + break + + if not match: + raise errors.PreProcessFail( + u'Keys not found inside plist file: {0:s}.'.format( + u','.join(self.PLIST_KEYS))) + + return self.ParseKey(match, key_name) + + def ParseKey(self, key, key_name): + """Retrieves a specific value from the key. + + Args: + key: The key object (instance of dict). + key_name: The name of the key. + + Returns: + The value of the key defined by key_name. + + Raises: + errors.PreProcessFail: if the preprocessing fails. + """ + value = key.get(key_name, None) + if not value: + raise errors.PreProcessFail( + u'Value of key: {0:s} not found.'.format(key_name)) + + return value + + +class XMLPlistPreprocessPlugin(PlistPreprocessPlugin): + """Class that defines the Mac OS X XML plist preprocess plugin object.""" + + def _GetKeys(self, xml_root, key_name): + """Return a dict with the requested keys.""" + match = {} + + generator = xml_root.iter() + for key in generator: + if 'key' in key.tag and key_name in key.text: + value_key = generator.next() + value = '' + for subkey in value_key.iter(): + if 'string' in subkey.tag: + value = subkey.text + match[key.text] = value + + # Now we need to go over the match dict and retrieve values. + return match + + def ParseFile(self, file_entry, file_object): + """Parse the file and return parsed key. + + Args: + file_entry: The file entry (instance of dfvfs.FileEntry). + file_object: The file-like object. + + Returns: + The value of the first key defined by PLIST_KEYS that is found. + + Raises: + errors.PreProcessFail: if the preprocessing fails. + """ + # TODO: Move to defusedxml for safer XML parsing. + try: + xml = ElementTree.parse(file_object) + except ElementTree.ParseError: + raise errors.PreProcessFail(u'File is not a XML file.') + except IOError: + raise errors.PreProcessFail(u'File is not a XML file.') + + xml_root = xml.getroot() + key_name = '' + match = None + for key in self.PLIST_KEYS: + match = self._GetKeys(xml_root, key) + if match: + key_name = key + break + + if not match: + raise errors.PreProcessFail( + u'Keys not found inside plist file: {0:s}.'.format( + u','.join(self.PLIST_KEYS))) + + return self.ParseKey(match, key_name) + + +class MacOSXBuild(XMLPlistPreprocessPlugin): + """Fetches build information about a Mac OS X system.""" + + ATTRIBUTE = 'build' + PLIST_PATH = '/System/Library/CoreServices/SystemVersion.plist' + + PLIST_KEYS = ['ProductUserVisibleVersion'] + + +class MacOSXHostname(XMLPlistPreprocessPlugin): + """Fetches hostname information about a Mac OS X system.""" + + ATTRIBUTE = 'hostname' + PLIST_PATH = '/Library/Preferences/SystemConfiguration/preferences.plist' + + PLIST_KEYS = ['ComputerName', 'LocalHostName'] + + +class MacOSXKeyboard(PlistPreprocessPlugin): + """Fetches keyboard information from a Mac OS X system.""" + + ATTRIBUTE = 'keyboard_layout' + PLIST_PATH = '/Library/Preferences/com.apple.HIToolbox.plist' + + PLIST_KEYS = ['AppleCurrentKeyboardLayoutInputSourceID'] + + def ParseKey(self, key, key_name): + """Determines the keyboard layout.""" + value = super(MacOSXKeyboard, self).ParseKey(key, key_name) + if type(value) in (list, tuple): + value = value[0] + _, _, keyboard_layout = value.rpartition('.') + + return keyboard_layout + + +class MacOSXTimeZone(interface.PreprocessPlugin): + """Gather timezone information from a Mac OS X system.""" + + ATTRIBUTE = 'time_zone_str' + SUPPORTED_OS = ['MacOSX'] + + WEIGHT = 1 + + ZONE_FILE_PATH = u'/private/etc/localtime' + + def GetValue(self, searcher, unused_knowledge_base): + """Determines the local time zone settings. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + + Returns: + The local timezone settings. + + Raises: + errors.PreProcessFail: if the preprocessing fails. + """ + path = self.ZONE_FILE_PATH + file_entry = self._FindFileEntry(searcher, path) + if not file_entry: + raise errors.PreProcessFail( + u'Unable to find file: {0:s}'.format(path)) + + if not file_entry.link: + raise errors.PreProcessFail( + u'Unable to retrieve timezone information from: {0:s}.'.format(path)) + + _, _, zone = file_entry.link.partition(u'zoneinfo/') + return zone + + +class MacOSXUsers(interface.PreprocessPlugin): + """Get information about user accounts on a Mac OS X system.""" + + SUPPORTED_OS = ['MacOSX'] + ATTRIBUTE = 'users' + WEIGHT = 1 + + # Define the path to the user account information. + USER_PATH = '/private/var/db/dslocal/nodes/Default/users/[^_].+.plist' + + _KEYS = frozenset(['name', 'uid', 'home', 'realname']) + + def _OpenPlistFile(self, searcher, path_spec): + """Open a Plist file given a path and returns a plist top level object. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + path_spec: The path specification (instance of dfvfs.PathSpec) + of the plist file. + + Raises: + errors.PreProcessFail: if the preprocessing fails. + """ + plist_file_location = getattr(path_spec, 'location', u'') + file_entry = searcher.GetFileEntryByPathSpec(path_spec) + file_object = file_entry.GetFileObject() + + try: + plist_file = binplist.BinaryPlist(file_object) + top_level_object = plist_file.Parse() + + except binplist.FormatError as exception: + exception = utils.GetUnicodeString(exception) + raise errors.PreProcessFail( + u'File is not a plist: {0:s}'.format(exception)) + + except OverflowError as exception: + raise errors.PreProcessFail( + u'Error processing: {0:s} with error: {1:s}'.format( + plist_file_location, exception)) + + if not plist_file: + raise errors.PreProcessFail( + u'File is not a plist: {0:s}'.format(plist_file_location)) + + return top_level_object + + def GetValue(self, searcher, unused_knowledge_base): + """Determines the user accounts. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + + Returns: + A list containing username information dicts. + + Raises: + errors.PreProcessFail: if the preprocessing fails. + """ + find_spec = file_system_searcher.FindSpec( + location_regex=self.USER_PATH, case_sensitive=False) + + path_specs = list(searcher.Find(find_specs=[find_spec])) + if not path_specs: + raise errors.PreProcessFail(u'Unable to find user plist files.') + + users = [] + for path_spec in path_specs: + plist_file_location = getattr(path_spec, 'location', u'') + if not plist_file_location: + raise errors.PreProcessFail(u'Missing user plist file location.') + + try: + top_level_object = self._OpenPlistFile(searcher, path_spec) + except IOError: + logging.warning(u'Unable to parse user plist file: {0:s}'.format( + plist_file_location)) + continue + + try: + match = plist_interface.GetKeysDefaultEmpty( + top_level_object, self._KEYS) + except KeyError as exception: + logging.warning( + u'Unable to read user plist file: {0:s} with error: {1:s}'.format( + plist_file_location, exception)) + continue + + # TODO: as part of artifacts, create a proper object for this. + user = { + 'uid': match.get('uid', [-1])[0], + 'path': match.get('home', [u''])[0], + 'name': match.get('name', [u''])[0], + 'realname': match.get('realname', [u'N/A'])[0]} + users.append(user) + + if not users: + raise errors.PreProcessFail(u'Unable to find any users on the system.') + + return users + + +manager.PreprocessPluginsManager.RegisterPlugins([ + MacOSXBuild, MacOSXHostname, MacOSXKeyboard, MacOSXTimeZone, MacOSXUsers]) diff --git a/plaso/preprocessors/macosx_test.py b/plaso/preprocessors/macosx_test.py new file mode 100644 index 0000000..5e80c7b --- /dev/null +++ b/plaso/preprocessors/macosx_test.py @@ -0,0 +1,221 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Mac OS X preprocess plug-ins.""" + +import os +import unittest + +from dfvfs.helpers import file_system_searcher +from dfvfs.path import fake_path_spec + +from plaso.artifacts import knowledge_base +from plaso.preprocessors import macosx +from plaso.preprocessors import test_lib + + +class MacOSXBuildTest(test_lib.PreprocessPluginTest): + """Tests for the Mac OS X build information preprocess plug-in object.""" + + _FILE_DATA = ( + '\n' + '\n' + '\n' + '\n' + '\tProductBuildVersion\n' + '\t13C64\n' + '\tProductCopyright\n' + '\t1983-2014 Apple Inc.\n' + '\tProductName\n' + '\tMac OS X\n' + '\tProductUserVisibleVersion\n' + '\t10.9.2\n' + '\tProductVersion\n' + '\t10.9.2\n' + '\n' + '\n') + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._fake_file_system = self._BuildSingleFileFakeFileSystem( + u'/System/Library/CoreServices/SystemVersion.plist', + self._FILE_DATA) + + mount_point = fake_path_spec.FakePathSpec(location=u'/') + self._searcher = file_system_searcher.FileSystemSearcher( + self._fake_file_system, mount_point) + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + plugin = macosx.MacOSXBuild() + plugin.Run(self._searcher, knowledge_base_object) + + build = knowledge_base_object.GetValue('build') + self.assertEquals(build, u'10.9.2') + + +class MacOSXHostname(test_lib.PreprocessPluginTest): + """Tests for the Mac OS X hostname preprocess plug-in object.""" + + # Note that is only part of the normal preferences.plist file data. + _FILE_DATA = ( + '\n' + '\n' + '\n' + '\n' + '\tSystem\n' + '\t\n' + '\t\tNetwork\n' + '\t\t\n' + '\t\t\tHostNames\n' + '\t\t\t\n' + '\t\t\t\tLocalHostName\n' + '\t\t\t\tPlaso\'s Mac mini\n' + '\t\t\t\n' + '\t\t\n' + '\t\tSystem\n' + '\t\t\n' + '\t\t\tComputerName\n' + '\t\t\tPlaso\'s Mac mini\n' + '\t\t\tComputerNameEncoding\n' + '\t\t\t0\n' + '\t\t\n' + '\t\n' + '\n' + '\n') + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._fake_file_system = self._BuildSingleFileFakeFileSystem( + u'/Library/Preferences/SystemConfiguration/preferences.plist', + self._FILE_DATA) + + mount_point = fake_path_spec.FakePathSpec(location=u'/') + self._searcher = file_system_searcher.FileSystemSearcher( + self._fake_file_system, mount_point) + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + plugin = macosx.MacOSXHostname() + plugin.Run(self._searcher, knowledge_base_object) + + self.assertEquals(knowledge_base_object.hostname, u'Plaso\'s Mac mini') + + +class MacOSXKeyboard(test_lib.PreprocessPluginTest): + """Tests for the Mac OS X keyboard layout preprocess plug-in object.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + file_object = open(os.path.join( + self._TEST_DATA_PATH, u'com.apple.HIToolbox.plist')) + file_data = file_object.read() + file_object.close() + + self._fake_file_system = self._BuildSingleFileFakeFileSystem( + u'/Library/Preferences/com.apple.HIToolbox.plist', + file_data) + + mount_point = fake_path_spec.FakePathSpec(location=u'/') + self._searcher = file_system_searcher.FileSystemSearcher( + self._fake_file_system, mount_point) + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + plugin = macosx.MacOSXKeyboard() + plugin.Run(self._searcher, knowledge_base_object) + + keyboard_layout = knowledge_base_object.GetValue('keyboard_layout') + self.assertEquals(keyboard_layout, u'US') + + +class MacOSXTimezone(test_lib.PreprocessPluginTest): + """Tests for the Mac OS X timezone preprocess plug-in object.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._fake_file_system = self._BuildSingleLinkFakeFileSystem( + u'/private/etc/localtime', u'/usr/share/zoneinfo/Europe/Amsterdam') + + mount_point = fake_path_spec.FakePathSpec(location=u'/') + self._searcher = file_system_searcher.FileSystemSearcher( + self._fake_file_system, mount_point) + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + plugin = macosx.MacOSXTimeZone() + plugin.Run(self._searcher, knowledge_base_object) + + time_zone_str = knowledge_base_object.GetValue('time_zone_str') + self.assertEquals(time_zone_str, u'Europe/Amsterdam') + + +class MacOSXUsersTest(test_lib.PreprocessPluginTest): + """Tests for the Mac OS X usernames preprocess plug-in object.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + file_object = open(os.path.join( + self._TEST_DATA_PATH, u'com.apple.HIToolbox.plist')) + file_data = file_object.read() + file_object.close() + + self._fake_file_system = self._BuildSingleFileFakeFileSystem( + u'/private/var/db/dslocal/nodes/Default/users/nobody.plist', + file_data) + + mount_point = fake_path_spec.FakePathSpec(location=u'/') + self._searcher = file_system_searcher.FileSystemSearcher( + self._fake_file_system, mount_point) + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + plugin = macosx.MacOSXUsers() + plugin.Run(self._searcher, knowledge_base_object) + + users = knowledge_base_object.GetValue('users') + self.assertEquals(len(users), 1) + + # TODO: fix the parsing of the following values to match the behavior on + # Mac OS X. + + # The string -2 is converted into the integer -1. + self.assertEquals(users[0].get('uid', None), -1) + # 'home' is 0 which represents: /var/empty but we convert it + # into u''. + self.assertEquals(users[0].get('path', None), u'') + # 'name' is 0 which represents: nobody but we convert it into u''. + self.assertEquals(users[0].get('name', None), u'') + # 'realname' is 0 which represents: 'Unprivileged User' but we convert it + # into u'N/A'. + self.assertEquals(users[0].get('realname', None), u'N/A') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/preprocessors/manager.py b/plaso/preprocessors/manager.py new file mode 100644 index 0000000..d70d46e --- /dev/null +++ b/plaso/preprocessors/manager.py @@ -0,0 +1,138 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The preprocess plugins manager.""" + +import logging + +from plaso.lib import errors + + +class PreprocessPluginsManager(object): + """Class that implements the preprocess plugins manager.""" + + _plugin_classes = {} + + @classmethod + def _GetPluginsByWeight(cls, platform, weight): + """Returns all plugins for a specific platform of a certain weight. + + Args: + platform: A string containing the supported operating system + of the plugin. + weight: An integer containing the weight of the plugin. + + Yields: + A preprocess plugin objects that matches the platform and weight. + """ + for plugin_class in cls._plugin_classes.itervalues(): + plugin_supported_os = getattr(plugin_class, 'SUPPORTED_OS', []) + plugin_weight = getattr(plugin_class, 'WEIGHT', 0) + if platform in plugin_supported_os and weight == plugin_weight: + yield plugin_class() + + @classmethod + def _GetWeights(cls, platform): + """Returns a list of all weights that are used by preprocessing plugins. + + Args: + platform: A string containing the supported operating system + of the plugin. + + Returns: + A list of weights. + """ + weights = {} + for plugin_class in cls._plugin_classes.itervalues(): + plugin_supported_os = getattr(plugin_class, 'SUPPORTED_OS', []) + plugin_weight = getattr(plugin_class, 'WEIGHT', 0) + if platform in plugin_supported_os: + weights[plugin_weight] = 1 + + return sorted(weights.keys()) + + @classmethod + def DeregisterPlugin(cls, plugin_class): + """Deregisters a plugin class. + + Args: + plugin_class: the class object of the plugin. + + Raises: + KeyError: if plugin class is not set for the corresponding name. + """ + if plugin_class.__name__ not in cls._plugin_classes: + raise KeyError( + u'Plugin class not set for name: {0:s}.'.format( + plugin_class.__name__)) + + del cls._plugin_classes[plugin_class.__name__] + + @classmethod + def RegisterPlugin(cls, plugin_class): + """Registers a plugin class. + + Args: + plugin_class: the class object of the plugin. + + Raises: + KeyError: if plugin class is already set for the corresponding name. + """ + if plugin_class.__name__ in cls._plugin_classes: + raise KeyError(( + u'Plugin class already set for name: {0:s}.').format( + plugin_class.__name__)) + + cls._plugin_classes[plugin_class.__name__] = plugin_class + + @classmethod + def RegisterPlugins(cls, plugin_classes): + """Registers a plugin classes. + + Args: + plugin_classes: a list of class objects of the plugins. + + Raises: + KeyError: if plugin class is already set for the corresponding name. + """ + for plugin_class in plugin_classes: + cls.RegisterPlugin(plugin_class) + + @classmethod + def RunPlugins(cls, platform, searcher, knowledge_base): + """Runs the plugins for a specific platform. + + Args: + platform: A string containing the supported operating system + of the plugin. + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + """ + for weight in cls._GetWeights(platform): + for plugin_object in cls._GetPluginsByWeight(platform, weight): + try: + plugin_object.Run(searcher, knowledge_base) + + except (IOError, errors.PreProcessFail) as exception: + logging.warning(( + u'Unable to run preprocessor: {0:s} for attribute: {1:s} ' + u'with error: {2:s}').format( + plugin_object.plugin_name, plugin_object.ATTRIBUTE, + exception)) diff --git a/plaso/preprocessors/manager_test.py b/plaso/preprocessors/manager_test.py new file mode 100644 index 0000000..4fdf638 --- /dev/null +++ b/plaso/preprocessors/manager_test.py @@ -0,0 +1,71 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the preprocess plugins manager.""" + +import unittest + +from plaso.preprocessors import interface +from plaso.preprocessors import manager + + +class TestPreprocessPlugin(interface.PreprocessPlugin): + """Preprocess test plugin.""" + + def GetValue(self, searcher, unused_knowledge_base): + """Returns the path as found by the searcher. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + + Returns: + The first path location string. + + Raises: + PreProcessFail: if the path could not be found. + """ + return + + +class PreprocessPluginsManagerTest(unittest.TestCase): + """Tests for the preprocess plugins manager.""" + + def testRegistration(self): + """Tests the RegisterPlugin and DeregisterPlugin functions.""" + # pylint: disable=protected-access + number_of_plugins = len(manager.PreprocessPluginsManager._plugin_classes) + + manager.PreprocessPluginsManager.RegisterPlugin(TestPreprocessPlugin) + self.assertEquals( + len(manager.PreprocessPluginsManager._plugin_classes), + number_of_plugins + 1) + + with self.assertRaises(KeyError): + manager.PreprocessPluginsManager.RegisterPlugin(TestPreprocessPlugin) + + manager.PreprocessPluginsManager.DeregisterPlugin(TestPreprocessPlugin) + self.assertEquals( + len(manager.PreprocessPluginsManager._plugin_classes), + number_of_plugins) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/preprocessors/test_lib.py b/plaso/preprocessors/test_lib.py new file mode 100644 index 0000000..03ec7a8 --- /dev/null +++ b/plaso/preprocessors/test_lib.py @@ -0,0 +1,91 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Preprocess plug-in related functions and classes for testing.""" + +import os +import unittest + +from dfvfs.lib import definitions as dfvfs_definitions +from dfvfs.resolver import context +from dfvfs.vfs import fake_file_system + + +class PreprocessPluginTest(unittest.TestCase): + """The unit test case for a preprocess plug-in object.""" + + _TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data') + + def _BuildSingleFileFakeFileSystem(self, path, file_data): + """Builds a single file fake file system. + + Args: + path: The path of the file. + file_data: The data of the file. + + Returns: + The fake file system (instance of dvfvs.FakeFileSystem). + """ + resolver_context = context.Context() + file_system = fake_file_system.FakeFileSystem( + resolver_context) + + file_system.AddFileEntry( + u'/', file_entry_type=dfvfs_definitions.FILE_ENTRY_TYPE_DIRECTORY) + + path_segments = path.split(u'/') + for segment_index in range(2, len(path_segments)): + path_segment = u'{0:s}'.format( + u'/'.join(path_segments[:segment_index])) + file_system.AddFileEntry( + path_segment, + file_entry_type=dfvfs_definitions.FILE_ENTRY_TYPE_DIRECTORY) + + file_system.AddFileEntry(path, file_data=file_data) + + return file_system + + def _BuildSingleLinkFakeFileSystem(self, path, linked_path): + """Builds a single link fake file system. + + Args: + path: The path of the link. + linked_path: The path that is linked. + + Returns: + The fake file system (instance of dvfvs.FakeFileSystem). + """ + resolver_context = context.Context() + file_system = fake_file_system.FakeFileSystem( + resolver_context) + + file_system.AddFileEntry( + u'/', file_entry_type=dfvfs_definitions.FILE_ENTRY_TYPE_DIRECTORY) + + path_segments = path.split(u'/') + for segment_index in range(2, len(path_segments)): + path_segment = u'{0:s}'.format( + u'/'.join(path_segments[:segment_index])) + file_system.AddFileEntry( + path_segment, + file_entry_type=dfvfs_definitions.FILE_ENTRY_TYPE_DIRECTORY) + + file_system.AddFileEntry( + path, file_entry_type=dfvfs_definitions.FILE_ENTRY_TYPE_LINK, + link_data=linked_path) + + return file_system diff --git a/plaso/preprocessors/windows.py b/plaso/preprocessors/windows.py new file mode 100644 index 0000000..a280424 --- /dev/null +++ b/plaso/preprocessors/windows.py @@ -0,0 +1,556 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains preprocessors for Windows.""" + +import abc +import logging + +from dfvfs.helpers import file_system_searcher + +from plaso.lib import errors +from plaso.preprocessors import interface +from plaso.preprocessors import manager +from plaso.winreg import cache +from plaso.winreg import path_expander as winreg_path_expander +from plaso.winreg import utils +from plaso.winreg import winregistry + + +class WindowsRegistryPreprocessPlugin(interface.PreprocessPlugin): + """Class that defines the Windows Registry preprocess plugin object. + + By default registry needs information about system paths, which excludes + them to run in priority 1, in some cases they may need to run in priority + 3, for instance if the Registry key is dependent on which version of Windows + is running, information that is collected during priority 2. + """ + __abstract = True + + SUPPORTED_OS = ['Windows'] + WEIGHT = 2 + + REG_KEY = '\\' + REG_PATH = '{sysregistry}' + REG_FILE = 'SOFTWARE' + + def __init__(self): + """Initializes the Window Registry preprocess plugin object.""" + super(WindowsRegistryPreprocessPlugin, self).__init__() + self._file_path_expander = winreg_path_expander.WinRegistryKeyPathExpander() + self._key_path_expander = None + + def GetValue(self, searcher, knowledge_base): + """Returns a value gathered from a Registry key for preprocessing. + + Args: + searcher: The file system searcher object (instance of + dfvfs.FileSystemSearcher). + knowledge_base: A knowledge base object (instance of KnowledgeBase), + which contains information from the source data needed + for parsing. + + Raises: + errors.PreProcessFail: If the preprocessing fails. + """ + # TODO: optimize this in one find. + try: + # TODO: do not pass the full pre_obj here but just the necessary values. + path = self._file_path_expander.ExpandPath( + self.REG_PATH, pre_obj=knowledge_base.pre_obj) + except KeyError: + path = u'' + + if not path: + raise errors.PreProcessFail( + u'Unable to expand path: {0:s}'.format(self.REG_PATH)) + + find_spec = file_system_searcher.FindSpec( + location=path, case_sensitive=False) + path_specs = list(searcher.Find(find_specs=[find_spec])) + + if not path_specs or len(path_specs) != 1: + raise errors.PreProcessFail( + u'Unable to find directory: {0:s}'.format(self.REG_PATH)) + + directory_location = searcher.GetRelativePath(path_specs[0]) + if not directory_location: + raise errors.PreProcessFail( + u'Missing directory location for: {0:s}'.format(self.REG_PATH)) + + # The path is split in segments to make it path segement separator + # independent (and thus platform independent). + path_segments = searcher.SplitPath(directory_location) + path_segments.append(self.REG_FILE) + + find_spec = file_system_searcher.FindSpec( + location=path_segments, case_sensitive=False) + path_specs = list(searcher.Find(find_specs=[find_spec])) + + if not path_specs: + raise errors.PreProcessFail( + u'Unable to find file: {0:s} in directory: {1:s}'.format( + self.REG_FILE, directory_location)) + + if len(path_specs) != 1: + raise errors.PreProcessFail(( + u'Find for file: {1:s} in directory: {0:s} returned {2:d} ' + u'results.').format( + self.REG_FILE, directory_location, len(path_specs))) + + file_location = getattr(path_specs[0], 'location', None) + if not directory_location: + raise errors.PreProcessFail( + u'Missing file location for: {0:s} in directory: {1:s}'.format( + self.REG_FILE, directory_location)) + + try: + file_entry = searcher.GetFileEntryByPathSpec(path_specs[0]) + except IOError as exception: + raise errors.PreProcessFail( + u'Unable to open file entry: {0:s} with error: {1:s}'.format( + file_location, exception)) + + if not file_entry: + raise errors.PreProcessFail( + u'Unable to open file entry: {0:s}'.format(file_location)) + + # TODO: remove this check win_registry.OpenFile doesn't fail instead? + try: + file_object = file_entry.GetFileObject() + file_object.close() + except IOError as exception: + raise errors.PreProcessFail( + u'Unable to open file object: {0:s} with error: {1:s}'.format( + file_location, exception)) + + win_registry = winregistry.WinRegistry( + winregistry.WinRegistry.BACKEND_PYREGF) + + try: + winreg_file = win_registry.OpenFile( + file_entry, codepage=knowledge_base.codepage) + except IOError as exception: + raise errors.PreProcessFail( + u'Unable to open Registry file: {0:s} with error: {1:s}'.format( + file_location, exception)) + + self.winreg_file = winreg_file + + if not self._key_path_expander: + # TODO: it is more efficient to have one cache that is passed to every + # plugin, or maybe one path expander. Or replace the path expander by + # dfvfs WindowsPathResolver? + reg_cache = cache.WinRegistryCache() + reg_cache.BuildCache(winreg_file, self.REG_FILE) + self._key_path_expander = winreg_path_expander.WinRegistryKeyPathExpander( + reg_cache=reg_cache) + + try: + # TODO: do not pass the full pre_obj here but just the necessary values. + key_path = self._key_path_expander.ExpandPath( + self.REG_KEY, pre_obj=knowledge_base.pre_obj) + except KeyError: + key_path = u'' + + if not key_path: + raise errors.PreProcessFail( + u'Unable to expand path: {0:s}'.format(self.REG_KEY)) + + try: + key = winreg_file.GetKeyByPath(key_path) + except IOError as exception: + raise errors.PreProcessFail( + u'Unable to fetch Registry key: {0:s} with error: {1:s}'.format( + key_path, exception)) + + if not key: + raise errors.PreProcessFail( + u'Registry key {0:s} does not exist.'.format(self.REG_KEY)) + + return self.ParseKey(key) + + @abc.abstractmethod + def ParseKey(self, key): + """Extract information from a Registry key and save in storage.""" + + +class WindowsCodepage(WindowsRegistryPreprocessPlugin): + """A preprocessing class that fetches codepage information.""" + + # Defines the preprocess attribute to be set. + ATTRIBUTE = 'code_page' + + # Depend upon the current control set, thus lower the priority. + WEIGHT = 3 + + REG_KEY = '{current_control_set}\\Control\\Nls\\CodePage' + REG_FILE = 'SYSTEM' + + def ParseKey(self, key): + """Retrieves the codepage or cp1252 by default.""" + value = key.GetValue('ACP') + if value and type(value.data) == unicode: + return u'cp{0:s}'.format(value.data) + + logging.warning( + u'Unable to determine ASCII string codepage, defaulting to cp1252.') + + return u'cp1252' + + +class WindowsHostname(WindowsRegistryPreprocessPlugin): + """A preprocessing class that fetches the hostname information.""" + + ATTRIBUTE = 'hostname' + + # Depend upon the current control set to be found. + WEIGHT = 3 + + REG_KEY = '{current_control_set}\\Control\\ComputerName\\ComputerName' + REG_FILE = 'SYSTEM' + + def ParseKey(self, key): + """Extract the hostname from the registry.""" + value = key.GetValue('ComputerName') + if value and type(value.data) == unicode: + return value.data + + +class WindowsProgramFilesPath(WindowsRegistryPreprocessPlugin): + """Fetch about the location for the Program Files directory.""" + + ATTRIBUTE = 'programfiles' + + REGFILE = 'SOFTWARE' + REG_KEY = '\\Microsoft\\Windows\\CurrentVersion' + + def ParseKey(self, key): + """Extract the version information from the key.""" + value = key.GetValue('ProgramFilesDir') + if value: + # Remove the first drive letter, eg: "C:\Program Files". + return u'{0:s}'.format(value.data.partition('\\')[2]) + + +class WindowsProgramFilesX86Path(WindowsRegistryPreprocessPlugin): + """Fetch about the location for the Program Files directory.""" + + ATTRIBUTE = 'programfilesx86' + + REGFILE = 'SOFTWARE' + REG_KEY = '\\Microsoft\\Windows\\CurrentVersion' + + def ParseKey(self, key): + """Extract the version information from the key.""" + value = key.GetValue(u'ProgramFilesDir (x86)') + if value: + # Remove the first drive letter, eg: "C:\Program Files". + return u'{0:s}'.format(value.data.partition('\\')[2]) + + +class WindowsSystemRegistryPath(interface.PathPreprocessPlugin): + """Get the system registry path.""" + SUPPORTED_OS = ['Windows'] + ATTRIBUTE = 'sysregistry' + PATH = '/(Windows|WinNT|WINNT35|WTSRV)/System32/config' + + +class WindowsSystemRootPath(interface.PathPreprocessPlugin): + """Get the system root path.""" + SUPPORTED_OS = ['Windows'] + ATTRIBUTE = 'systemroot' + PATH = '/(Windows|WinNT|WINNT35|WTSRV)' + + +class WindowsTimeZone(WindowsRegistryPreprocessPlugin): + """A preprocessing class that fetches timezone information.""" + + # Defines the preprocess attribute to be set. + ATTRIBUTE = 'time_zone_str' + + # Depend upon the current control set, thus lower the priority. + WEIGHT = 3 + + REG_KEY = '{current_control_set}\\Control\\TimeZoneInformation' + REG_FILE = 'SYSTEM' + + # transform gathered from these sources: + # Prebuilt from: + # HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\ + ZONE_LIST = { + 'IndiaStandardTime': 'Asia/Kolkata', + 'EasternStandardTime': 'EST5EDT', + 'EasternDaylightTime': 'EST5EDT', + 'MountainStandardTime': 'MST7MDT', + 'MountainDaylightTime': 'MST7MDT', + 'PacificStandardTime': 'PST8PDT', + 'PacificDaylightTime': 'PST8PDT', + 'CentralStandardTime': 'CST6CDT', + 'CentralDaylightTime': 'CST6CDT', + 'SamoaStandardTime': 'US/Samoa', + 'HawaiianStandardTime': 'US/Hawaii', + 'AlaskanStandardTime': 'US/Alaska', + 'MexicoStandardTime2': 'MST7MDT', + 'USMountainStandardTime': 'MST7MDT', + 'CanadaCentralStandardTime': 'CST6CDT', + 'MexicoStandardTime': 'CST6CDT', + 'CentralAmericaStandardTime': 'CST6CDT', + 'USEasternStandardTime': 'EST5EDT', + 'SAPacificStandardTime': 'EST5EDT', + 'MalayPeninsulaStandardTime': 'Asia/Kuching', + 'PacificSAStandardTime': 'Canada/Atlantic', + 'AtlanticStandardTime': 'Canada/Atlantic', + 'SAWesternStandardTime': 'Canada/Atlantic', + 'NewfoundlandStandardTime': 'Canada/Newfoundland', + 'AzoresStandardTime': 'Atlantic/Azores', + 'CapeVerdeStandardTime': 'Atlantic/Azores', + 'GMTStandardTime': 'GMT', + 'GreenwichStandardTime': 'GMT', + 'W.CentralAfricaStandardTime': 'Europe/Belgrade', + 'W.EuropeStandardTime': 'Europe/Belgrade', + 'CentralEuropeStandardTime': 'Europe/Belgrade', + 'RomanceStandardTime': 'Europe/Belgrade', + 'CentralEuropeanStandardTime': 'Europe/Belgrade', + 'E.EuropeStandardTime': 'Egypt', + 'SouthAfricaStandardTime': 'Egypt', + 'IsraelStandardTime': 'Egypt', + 'EgyptStandardTime': 'Egypt', + 'NorthAsiaEastStandardTime': 'Asia/Bangkok', + 'SingaporeStandardTime': 'Asia/Bangkok', + 'ChinaStandardTime': 'Asia/Bangkok', + 'W.AustraliaStandardTime': 'Australia/Perth', + 'TaipeiStandardTime': 'Asia/Bangkok', + 'TokyoStandardTime': 'Asia/Tokyo', + 'KoreaStandardTime': 'Asia/Seoul', + '@tzres.dll,-10': 'Atlantic/Azores', + '@tzres.dll,-11': 'Atlantic/Azores', + '@tzres.dll,-12': 'Atlantic/Azores', + '@tzres.dll,-20': 'Atlantic/Cape_Verde', + '@tzres.dll,-21': 'Atlantic/Cape_Verde', + '@tzres.dll,-22': 'Atlantic/Cape_Verde', + '@tzres.dll,-40': 'Brazil/East', + '@tzres.dll,-41': 'Brazil/East', + '@tzres.dll,-42': 'Brazil/East', + '@tzres.dll,-70': 'Canada/Newfoundland', + '@tzres.dll,-71': 'Canada/Newfoundland', + '@tzres.dll,-72': 'Canada/Newfoundland', + '@tzres.dll,-80': 'Canada/Atlantic', + '@tzres.dll,-81': 'Canada/Atlantic', + '@tzres.dll,-82': 'Canada/Atlantic', + '@tzres.dll,-104': 'America/Cuiaba', + '@tzres.dll,-105': 'America/Cuiaba', + '@tzres.dll,-110': 'EST5EDT', + '@tzres.dll,-111': 'EST5EDT', + '@tzres.dll,-112': 'EST5EDT', + '@tzres.dll,-120': 'EST5EDT', + '@tzres.dll,-121': 'EST5EDT', + '@tzres.dll,-122': 'EST5EDT', + '@tzres.dll,-130': 'EST5EDT', + '@tzres.dll,-131': 'EST5EDT', + '@tzres.dll,-132': 'EST5EDT', + '@tzres.dll,-140': 'CST6CDT', + '@tzres.dll,-141': 'CST6CDT', + '@tzres.dll,-142': 'CST6CDT', + '@tzres.dll,-150': 'America/Guatemala', + '@tzres.dll,-151': 'America/Guatemala', + '@tzres.dll,-152': 'America/Guatemala', + '@tzres.dll,-160': 'CST6CDT', + '@tzres.dll,-161': 'CST6CDT', + '@tzres.dll,-162': 'CST6CDT', + '@tzres.dll,-170': 'America/Mexico_City', + '@tzres.dll,-171': 'America/Mexico_City', + '@tzres.dll,-172': 'America/Mexico_City', + '@tzres.dll,-180': 'MST7MDT', + '@tzres.dll,-181': 'MST7MDT', + '@tzres.dll,-182': 'MST7MDT', + '@tzres.dll,-190': 'MST7MDT', + '@tzres.dll,-191': 'MST7MDT', + '@tzres.dll,-192': 'MST7MDT', + '@tzres.dll,-200': 'MST7MDT', + '@tzres.dll,-201': 'MST7MDT', + '@tzres.dll,-202': 'MST7MDT', + '@tzres.dll,-210': 'PST8PDT', + '@tzres.dll,-211': 'PST8PDT', + '@tzres.dll,-212': 'PST8PDT', + '@tzres.dll,-220': 'US/Alaska', + '@tzres.dll,-221': 'US/Alaska', + '@tzres.dll,-222': 'US/Alaska', + '@tzres.dll,-230': 'US/Hawaii', + '@tzres.dll,-231': 'US/Hawaii', + '@tzres.dll,-232': 'US/Hawaii', + '@tzres.dll,-260': 'GMT', + '@tzres.dll,-261': 'GMT', + '@tzres.dll,-262': 'GMT', + '@tzres.dll,-271': 'UTC', + '@tzres.dll,-272': 'UTC', + '@tzres.dll,-280': 'Europe/Budapest', + '@tzres.dll,-281': 'Europe/Budapest', + '@tzres.dll,-282': 'Europe/Budapest', + '@tzres.dll,-290': 'Europe/Warsaw', + '@tzres.dll,-291': 'Europe/Warsaw', + '@tzres.dll,-292': 'Europe/Warsaw', + '@tzres.dll,-331': 'Europe/Nicosia', + '@tzres.dll,-332': 'Europe/Nicosia', + '@tzres.dll,-340': 'Africa/Cairo', + '@tzres.dll,-341': 'Africa/Cairo', + '@tzres.dll,-342': 'Africa/Cairo', + '@tzres.dll,-350': 'Europe/Sofia', + '@tzres.dll,-351': 'Europe/Sofia', + '@tzres.dll,-352': 'Europe/Sofia', + '@tzres.dll,-365': 'Egypt', + '@tzres.dll,-390': 'Asia/Kuwait', + '@tzres.dll,-391': 'Asia/Kuwait', + '@tzres.dll,-392': 'Asia/Kuwait', + '@tzres.dll,-400': 'Asia/Baghdad', + '@tzres.dll,-401': 'Asia/Baghdad', + '@tzres.dll,-402': 'Asia/Baghdad', + '@tzres.dll,-410': 'Africa/Nairobi', + '@tzres.dll,-411': 'Africa/Nairobi', + '@tzres.dll,-412': 'Africa/Nairobi', + '@tzres.dll,-434': 'Asia/Tbilisi', + '@tzres.dll,-435': 'Asia/Tbilisi', + '@tzres.dll,-440': 'Asia/Muscat', + '@tzres.dll,-441': 'Asia/Muscat', + '@tzres.dll,-442': 'Asia/Muscat', + '@tzres.dll,-447': 'Asia/Baku', + '@tzres.dll,-448': 'Asia/Baku', + '@tzres.dll,-449': 'Asia/Baku', + '@tzres.dll,-450': 'Asia/Yerevan', + '@tzres.dll,-451': 'Asia/Yerevan', + '@tzres.dll,-452': 'Asia/Yerevan', + '@tzres.dll,-460': 'Asia/Kabul', + '@tzres.dll,-461': 'Asia/Kabul', + '@tzres.dll,-462': 'Asia/Kabul', + '@tzres.dll,-471': 'Asia/Yekaterinburg', + '@tzres.dll,-472': 'Asia/Yekaterinburg', + '@tzres.dll,-480': 'Asia/Karachi', + '@tzres.dll,-481': 'Asia/Karachi', + '@tzres.dll,-482': 'Asia/Karachi', + '@tzres.dll,-490': 'Asia/Kolkata', + '@tzres.dll,-491': 'Asia/Kolkata', + '@tzres.dll,-492': 'Asia/Kolkata', + '@tzres.dll,-500': 'Asia/Kathmandu', + '@tzres.dll,-501': 'Asia/Kathmandu', + '@tzres.dll,-502': 'Asia/Kathmandu', + '@tzres.dll,-510': 'Asia/Dhaka', + '@tzres.dll,-511': 'Asia/Aqtau', + '@tzres.dll,-512': 'Asia/Aqtau', + '@tzres.dll,-570': 'Asia/Chongqing', + '@tzres.dll,-571': 'Asia/Chongqing', + '@tzres.dll,-572': 'Asia/Chongqing', + '@tzres.dll,-650': 'Australia/Darwin', + '@tzres.dll,-651': 'Australia/Darwin', + '@tzres.dll,-652': 'Australia/Darwin', + '@tzres.dll,-660': 'Australia/Adelaide', + '@tzres.dll,-661': 'Australia/Adelaide', + '@tzres.dll,-662': 'Australia/Adelaide', + '@tzres.dll,-670': 'Australia/Sydney', + '@tzres.dll,-671': 'Australia/Sydney', + '@tzres.dll,-672': 'Australia/Sydney', + '@tzres.dll,-680': 'Australia/Brisbane', + '@tzres.dll,-681': 'Australia/Brisbane', + '@tzres.dll,-682': 'Australia/Brisbane', + '@tzres.dll,-721': 'Pacific/Port_Moresby', + '@tzres.dll,-722': 'Pacific/Port_Moresby', + '@tzres.dll,-731': 'Pacific/Fiji', + '@tzres.dll,-732': 'Pacific/Fiji', + '@tzres.dll,-840': 'America/Argentina/Buenos_Aires', + '@tzres.dll,-841': 'America/Argentina/Buenos_Aires', + '@tzres.dll,-842': 'America/Argentina/Buenos_Aires', + '@tzres.dll,-880': 'UTC', + '@tzres.dll,-930': 'UTC', + '@tzres.dll,-931': 'UTC', + '@tzres.dll,-932': 'UTC', + '@tzres.dll,-1010': 'Asia/Aqtau', + '@tzres.dll,-1020': 'Asia/Dhaka', + '@tzres.dll,-1021': 'Asia/Dhaka', + '@tzres.dll,-1022': 'Asia/Dhaka', + '@tzres.dll,-1070': 'Asia/Tbilisi', + '@tzres.dll,-1120': 'America/Cuiaba', + '@tzres.dll,-1140': 'Pacific/Fiji', + '@tzres.dll,-1460': 'Pacific/Port_Moresby', + '@tzres.dll,-1530': 'Asia/Yekaterinburg', + '@tzres.dll,-1630': 'Europe/Nicosia', + '@tzres.dll,-1660': 'America/Bahia', + '@tzres.dll,-1661': 'America/Bahia', + '@tzres.dll,-1662': 'America/Bahia', + 'Central Standard Time': 'CST6CDT', + 'Pacific Standard Time': 'PST8PDT', + } + + def ParseKey(self, key): + """Extract timezone information from the registry.""" + value = key.GetValue('StandardName') + if value and type(value.data) == unicode: + # Do a mapping to a value defined as in the Olson database. + return self.ZONE_LIST.get(value.data.replace(' ', ''), value.data) + + +class WindowsUsers(WindowsRegistryPreprocessPlugin): + """Fetch information about user profiles.""" + + ATTRIBUTE = 'users' + + REG_FILE = 'SOFTWARE' + REG_KEY = '\\Microsoft\\Windows NT\\CurrentVersion\\ProfileList' + + def ParseKey(self, key): + """Extract current control set information.""" + users = [] + + for sid in key.GetSubkeys(): + # TODO: as part of artifacts, create a proper object for this. + user = {} + user['sid'] = sid.name + value = sid.GetValue('ProfileImagePath') + if value: + user['path'] = value.data + user['name'] = utils.WinRegBasename(user['path']) + + users.append(user) + + return users + + +class WindowsVersion(WindowsRegistryPreprocessPlugin): + """Fetch information about the current Windows version.""" + + ATTRIBUTE = 'osversion' + + REGFILE = 'SOFTWARE' + REG_KEY = '\\Microsoft\\Windows NT\\CurrentVersion' + + def ParseKey(self, key): + """Extract the version information from the key.""" + value = key.GetValue('ProductName') + if value: + return u'{0:s}'.format(value.data) + + +class WindowsWinDirPath(interface.PathPreprocessPlugin): + """Get the system path.""" + SUPPORTED_OS = ['Windows'] + ATTRIBUTE = 'windir' + PATH = '/(Windows|WinNT|WINNT35|WTSRV)' + + +manager.PreprocessPluginsManager.RegisterPlugins([ + WindowsCodepage, WindowsHostname, WindowsProgramFilesPath, + WindowsProgramFilesX86Path, WindowsSystemRegistryPath, + WindowsSystemRootPath, WindowsTimeZone, WindowsUsers, WindowsVersion, + WindowsWinDirPath]) diff --git a/plaso/preprocessors/windows_test.py b/plaso/preprocessors/windows_test.py new file mode 100644 index 0000000..c69bcdf --- /dev/null +++ b/plaso/preprocessors/windows_test.py @@ -0,0 +1,265 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Windows preprocess plug-ins.""" + +import os +import unittest + +from dfvfs.helpers import file_system_searcher +from dfvfs.path import fake_path_spec + +from plaso.artifacts import knowledge_base +from plaso.preprocessors import windows +from plaso.preprocessors import test_lib + + +class WindowsSoftwareRegistryTest(test_lib.PreprocessPluginTest): + """Base class for tests that use the SOFTWARE Registry file.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + file_object = open(os.path.join( + self._TEST_DATA_PATH, u'SYSTEM'), 'rb') + file_data = file_object.read() + file_object.close() + + self._fake_file_system = self._BuildSingleFileFakeFileSystem( + u'/Windows/System32/config/SYSTEM', file_data) + + file_object = open(os.path.join( + self._TEST_DATA_PATH, u'SOFTWARE'), 'rb') + file_data = file_object.read() + file_object.close() + + self._fake_file_system.AddFileEntry( + u'/Windows/System32/config/SOFTWARE', file_data=file_data) + + mount_point = fake_path_spec.FakePathSpec(location=u'/') + self._searcher = file_system_searcher.FileSystemSearcher( + self._fake_file_system, mount_point) + + +class WindowsSystemRegistryTest(test_lib.PreprocessPluginTest): + """Base class for tests that use the SYSTEM Registry file.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + file_object = open(os.path.join( + self._TEST_DATA_PATH, u'SYSTEM'), 'rb') + file_data = file_object.read() + file_object.close() + + self._fake_file_system = self._BuildSingleFileFakeFileSystem( + u'/Windows/System32/config/SYSTEM', file_data) + + mount_point = fake_path_spec.FakePathSpec(location=u'/') + self._searcher = file_system_searcher.FileSystemSearcher( + self._fake_file_system, mount_point) + + +class WindowsCodepageTest(WindowsSystemRegistryTest): + """Tests for the Windows codepage preprocess plug-in object.""" + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + # The plug-in needs to expand {sysregistry} so we need to run + # the WindowsSystemRegistryPath plug-in first. + plugin = windows.WindowsSystemRegistryPath() + plugin.Run(self._searcher, knowledge_base_object) + + plugin = windows.WindowsCodepage() + plugin.Run(self._searcher, knowledge_base_object) + + self.assertEquals(knowledge_base_object.codepage, u'cp1252') + + +class WindowsHostnameTest(WindowsSystemRegistryTest): + """Tests for the Windows hostname preprocess plug-in object.""" + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + # The plug-in needs to expand {sysregistry} so we need to run + # the WindowsSystemRegistryPath plug-in first. + plugin = windows.WindowsSystemRegistryPath() + plugin.Run(self._searcher, knowledge_base_object) + + plugin = windows.WindowsHostname() + plugin.Run(self._searcher, knowledge_base_object) + + self.assertEquals(knowledge_base_object.hostname, u'WKS-WIN732BITA') + + +class WindowsProgramFilesPath(WindowsSoftwareRegistryTest): + """Tests for the Windows Program Files path preprocess plug-in object.""" + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + # The plug-in needs to expand {sysregistry} so we need to run + # the WindowsSystemRegistryPath plug-in first. + plugin = windows.WindowsSystemRegistryPath() + plugin.Run(self._searcher, knowledge_base_object) + + plugin = windows.WindowsProgramFilesPath() + plugin.Run(self._searcher, knowledge_base_object) + + path = knowledge_base_object.GetValue('programfiles') + self.assertEquals(path, u'Program Files') + + +class WindowsProgramFilesX86Path(WindowsSoftwareRegistryTest): + """Tests for the Windows Program Files X86 path preprocess plug-in object.""" + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + # The plug-in needs to expand {sysregistry} so we need to run + # the WindowsSystemRegistryPath plug-in first. + plugin = windows.WindowsSystemRegistryPath() + plugin.Run(self._searcher, knowledge_base_object) + + plugin = windows.WindowsProgramFilesX86Path() + + plugin.Run(self._searcher, knowledge_base_object) + + path = knowledge_base_object.GetValue('programfilesx86') + # The test SOFTWARE Registry file does not contain a value for + # the Program Files X86 path. + self.assertEquals(path, None) + + +class WindowsSystemRegistryPathTest(test_lib.PreprocessPluginTest): + """Tests for the Windows system Registry path preprocess plug-in object.""" + + _FILE_DATA = 'regf' + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._fake_file_system = self._BuildSingleFileFakeFileSystem( + u'/Windows/System32/config/SYSTEM', self._FILE_DATA) + + mount_point = fake_path_spec.FakePathSpec(location=u'/') + self._searcher = file_system_searcher.FileSystemSearcher( + self._fake_file_system, mount_point) + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + plugin = windows.WindowsSystemRegistryPath() + plugin.Run(self._searcher, knowledge_base_object) + + path = knowledge_base_object.GetValue('sysregistry') + self.assertEquals(path, u'/Windows/System32/config') + + +class WindowsSystemRootPathTest(test_lib.PreprocessPluginTest): + """Tests for the Windows system Root path preprocess plug-in object.""" + + _FILE_DATA = 'regf' + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._fake_file_system = self._BuildSingleFileFakeFileSystem( + u'/Windows/System32/config/SYSTEM', self._FILE_DATA) + + mount_point = fake_path_spec.FakePathSpec(location=u'/') + self._searcher = file_system_searcher.FileSystemSearcher( + self._fake_file_system, mount_point) + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + plugin = windows.WindowsSystemRootPath() + plugin.Run(self._searcher, knowledge_base_object) + + path = knowledge_base_object.GetValue('systemroot') + self.assertEquals(path, u'/Windows') + + +class WindowsTimeZoneTest(WindowsSystemRegistryTest): + """Tests for the Windows timezone preprocess plug-in object.""" + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + # The plug-in needs to expand {sysregistry} so we need to run + # the WindowsSystemRegistryPath plug-in first. + plugin = windows.WindowsSystemRegistryPath() + plugin.Run(self._searcher, knowledge_base_object) + + plugin = windows.WindowsTimeZone() + plugin.Run(self._searcher, knowledge_base_object) + + time_zone_str = knowledge_base_object.GetValue('time_zone_str') + self.assertEquals(time_zone_str, u'EST5EDT') + + +class WindowsUsersTest(WindowsSoftwareRegistryTest): + """Tests for the Windows username preprocess plug-in object.""" + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + # The plug-in needs to expand {sysregistry} so we need to run + # the WindowsSystemRegistryPath plug-in first. + plugin = windows.WindowsSystemRegistryPath() + plugin.Run(self._searcher, knowledge_base_object) + + plugin = windows.WindowsUsers() + plugin.Run(self._searcher, knowledge_base_object) + + users = knowledge_base_object.GetValue('users') + self.assertEquals(len(users), 11) + + expected_sid = u'S-1-5-21-2036804247-3058324640-2116585241-1114' + self.assertEquals(users[9].get('sid', None), expected_sid) + self.assertEquals(users[9].get('name', None), u'rsydow') + self.assertEquals(users[9].get('path', None), u'C:\\Users\\rsydow') + + +class WindowsVersionTest(WindowsSoftwareRegistryTest): + """Tests for the Windows version preprocess plug-in object.""" + + def testGetValue(self): + """Tests the GetValue function.""" + knowledge_base_object = knowledge_base.KnowledgeBase() + + # The plug-in needs to expand {sysregistry} so we need to run + # the WindowsSystemRegistryPath plug-in first. + plugin = windows.WindowsSystemRegistryPath() + plugin.Run(self._searcher, knowledge_base_object) + + plugin = windows.WindowsVersion() + plugin.Run(self._searcher, knowledge_base_object) + + osversion = knowledge_base_object.GetValue('osversion') + self.assertEquals(osversion, u'Windows 7 Ultimate') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/proto/__init__.py b/plaso/proto/__init__.py new file mode 100644 index 0000000..1f5c4b3 --- /dev/null +++ b/plaso/proto/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/proto/plaso_storage.proto b/plaso/proto/plaso_storage.proto new file mode 100644 index 0000000..b0c2798 --- /dev/null +++ b/plaso/proto/plaso_storage.proto @@ -0,0 +1,367 @@ +// Copyright 2012 The Plaso Project Authors. +// Please see the AUTHORS file for details on individual authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Kristinn Gudjonsson > + +// This is the main protobuf for event storage in plaso.o + +syntax = "proto2"; + +package plaso_storage; + +// Each EventObject can contain any attribute +// as long as it can be expressed in any of the supported +// formats (eg, string, int, array, dict). +// This can be looked as a hash or a dict object, with a key +// and a value values. +message Attribute { + // The key to the dict object, something like 'username'. + required string key = 1; + + // If the value is a string. + optional string string = 2; + // If the value is an integer. + optional int64 integer = 3; + // If the value is an array. + optional Array array = 4; + // If the value is a dictionary. + optional Dict dict = 5; + // If the value is a boolean value. + optional bool boolean = 6; + // If we have a "raw" byte value. + optional bytes data = 7; + // If we have a "float" value. + optional float float = 8; + // If there is a None value (happens). + optional bool none = 9; +}; + +// A list of of Attributes, to build up a dictionary. +message Dict { + repeated Attribute attributes = 1; +}; + +// A value, used for lists or arrays. +message Value { + optional int64 integer = 1; + optional string string = 2; + optional bytes data = 3; + optional Array array = 4; + optional Dict dict = 5; + optional bool boolean = 6; + optional float float = 7; + optional bool none = 8; +}; + +// A list of values, either integers or strings, to make up an array. +message Array { + repeated Value values = 1; +}; + +// Each event read by the tool is stored as an EventObject, +// an EventObject contains certain fixed sets of attributes +// and it can also store any additional attributes. +// This message stores the main attributes inside the each record +// instead of nesting them possibly deep down. +message EventObject { + // The timestamp is presented as a 64 bit Unix Epoch time, + // stored in UTC. + optional int64 timestamp = 1; + + // A short description of the meaning of the timestamp. + // This could be something as 'File Written', 'Last Written', + // 'Page Visited', 'File Downloaded', or something like that. + optional string timestamp_desc = 2; + + // The type of the event data stored in the attributes. + required string data_type = 3; + + // A list of all the stored attributes within the event. + repeated Attribute attributes = 4; + + // The timezone of the source where the timestamp came from. + optional string timezone = 5; + + // The filename from where the event was extracted from. + optional string filename = 6; + optional string display_name = 7; + + // The full PathSpec where the file was extracted from. + optional bytes pathspec = 8; + + // The offset into the original file from where the event came from. + optional int64 offset = 9; + + // Information about where this object is stored, added by the storage + // library to make it easier to quickly recover the EventObject from + // the storage file. + optional int64 store_number = 10; + optional int64 store_index = 11; + + // EventTagging is a message that can be added that include information + // about coloring, tagging or comments that this object contains. + optional EventTagging tag = 12; + + // Description of the origin of the event in generic terms that + // mostly adhere to the definition of the TLN format. + // TODO: Remove this field from the EventObject message. + enum SourceShort { + AV = 1; // All Anti-Virus engine log files. + BACK = 2; // Information from backup points, eg. restore points, VSS. + EVT = 3; // EventLog entries, both the EVT format and EVTX. + EXIF = 4; // EXIF information. + FILE = 5; // FILE related information, mactime information. + LOG = 6; // Generic log file, most log files should fit this. + LNK = 7; // Shortcut or link files. + LSO = 8; // Flash cookies, or Local Shared Objects. + META = 9; // Metadata information. + PLIST = 10; // Information extracted from plist files. + RAM = 11; // Information extracted from RAM. + RECBIN = 12; // Recycle bin or deleted items. + REG = 13; // Registry information. + WEBHIST = 14; // Browser history. + TORRENT = 15; // Torrent files. + JOB = 16; // Scheduled tasks or jobs. + } + + // The category or short description of the source. + // TODO: Remove this field from the EventObject message. + optional SourceShort source_short = 13; + + // The short description is not sufficient to describe the source + // of the event. A longer source field is therefore provided to add + // more context to the source. The source_long field should not be + // long, two or three words should be sufficient for most parts. + // The field is not strictly defined, it should just be short and + // fully descriptive. + // + // Example field names are: + // Chrome Browser History + // Chrome Download History + // Recycle Bin + // NTUSER.DAT Registry + // Sophos AV Log + // TODO: Remove this field from the EventObject message. + optional string source_long = 14; + + ///////////////////////////////////////////////////////////////////// + // Include common attribute names to flatten out the storage. + ///////////////////////////////////////////////////////////////////// + + // The name of the parser used to extract this item. + optional string parser = 15; + // The integer value of the inode of the file this entry is extracted from. + optional int64 inode = 16; + // The extracted hostname this entry came from. + optional string hostname = 17; + // The name of the plugin that was used, if applicable. + optional string plugin = 18; + // For Windows Registry files, defines the type of registry, eg: NTUSER, SAM. + optional string registry_type = 19; + // Boolean value that indicates whether the file was allocated or not. + optional bool allocated = 20; + // For filesystem records, defines the type of filesystem, eg: NTFS, FAT. + optional string fs_type = 21; + // Many parsers attempt to recover partially deleted entries, this boolean + // value is present in those parsers and indicates whether this particular + // entry is recovered or not. + optional bool recovered = 22; + // Contains the record number in log files that contain sequential + // information, such as Windows EventLog. + optional int64 record_number = 23; + // If the file being parsed contains different sources, such as "Security" or + // other similar source types it can be stored here. + optional string source_name = 24; + // Some log files, such as the EventLog, stores information about from which + // computer this particular entry came from. Often used in log files that can + // consolidate entries from more than a single host. + optional string computer_name = 25; + // Few entries that are specific to Windows EventLog entries, common enough + // to get specially defined attributes. + optional int64 event_identifier = 26; + optional int64 event_level = 27; + optional string xml_string = 28; + optional Array strings = 29; + // Some files contain information about the username that produced the + // extracted record. + optional string username = 30; + // Sometimes the username is not available but a SID or a UID. + optional string user_sid = 31; + // A field indicating the size of a cache file. + optional int64 cached_file_size = 32; + // Mostly used in browser history plugins referencing the number of times + // someone visited that particular entry. + optional int64 number_of_hits = 33; + // Used in MSIECF to indicate the index name of the cache directory. + optional int64 cache_directory_index = 34; + // Mostly used in browser history plugins. Contains the title of the web + // visited web page (store in tag). + optional string title = 35; + // Several parsers extract metadata items from events and store them + // in a dictionary. + optional Dict metadata = 36; + // An URL extracted from things like browser history. + optional string url = 37; + // Windows registry keyname attribute. + optional string keyname = 38; + // Extracted values from a Windows registry key. + optional Dict regvalue = 39; + // Some text based parsers define this attribute for their text + // representation. + optional string text = 40; + + // The UUID is a hex string that uniquely identifies the EventObject. + optional string uuid = 41; +}; + +// The EventTagging is a simple message that describes comments, +// color information or tagging of EventObjects. This information +// is usually manually added by an investigator and can be used +// to add more context to certain events. +message EventTagging { + // Description of where the EventObject is stored that this + // tag is describing. It is necessary to either define these + // two values or the event_uuid, otherwise it will not be + // possible to locate the event object this belongs to. + optional int64 store_number = 1; + optional int64 store_index = 2; + + // An arbitrary string that contains a comment describing + // observations the investigator has about this EventObject. + optional string comment = 3; + + // Color information, used in some front-ends to make this + // event stand out. This should be either a simple description + // of the color, eg "red", "yellow", etc or a HTML color code, + // eg: "#E11414". + optional string color = 4; + + // A short string or a tag that describes that can be used to + // group together events that are related to one another, eg + // "Malware", "Entry Point", "Event of Interest", etc. + message Tag { + required string value = 1; + }; + + repeated Tag tags = 5; + + // An UUID value of the particular event object that this tag + // belongs to. This value has to be set if the store_number and + // store_index are not know at the time of tagging. + optional string event_uuid = 6; +}; + +// The EventGroup is a simple mechanism to describe a group of +// events that belong to the same action or behavior. This is +// a simple mechanism to store this information so a front-end +// can collapse all these events into a single source. +message EventGroup { + // The name of the EventGroup, what is displayed in the front-end + // as a substitute for the other events, should be descriptive + // of the events that are grouped together, as in "USB Drive inserted", + // or "User Logged On". + required string name = 1; + + // Optional longer description of the group, giving a more detailed + // description of what the grouping describes or why these events + // were grouped together. + optional string description = 2; + + // If these events contain a timestamp it can be beneficial to + // include the timerange of events this group spans. That time range + // can be described by the first and last timestamp that is contained + // within the group. + optional int64 first_timestamp = 3; + optional int64 last_timestamp = 4; + + // Optional color information that can be used in the front-end + // to give color information about the group. This can be described + // as a simple color, eg: "red", "orange", "green" or as a HTML + // color code, eg "#E11414". + optional string color = 5; + + // If this group of events falls into a specific category it can + // be included here, eg: "User Behavior", "Malware Related", etc. + optional string category = 6; + + // Information about which EventObjects are included in this group. + // The information is stored in an attribute called EventDescription + // that simply defines where the EventObjects are stored so they can + // be easily identified and recovered. + message EventDescription { + // Description of where these events are stored within the storage + // file. + required int64 store_number = 1; + required int64 store_index = 2; + }; + + repeated EventDescription events = 7; +}; + +// The PreProcess protobuf is a simple message that stores information +// gathered from the preprocessing stage of plaso. +message PreProcess { + // Storing information about the runtime of the tool. + optional Dict collection_information = 1; + + // A dict that contains information about counters stored within the + // the store. + optional Dict counter = 2; + + // A list value that depicts the range of store numbers this particular + // PreProcess message applies to. + optional Array store_range = 3; + + // All attributes that each preprocessing module produces gets stored + // inside this field. + repeated Attribute attributes = 4; + + // A dict that contains information about plugin counters. + optional Dict plugin_counter = 5; +}; + +// The AnalysisReport object is a simple message describing a report +// created from an analysis plugin. +message AnalysisReport { + // Name of the analysis plugin that created this report. + optional string plugin_name = 1; + // The timestamp of when this report was created. + optional int64 time_compiled = 2; + + // The actual report content, the free flowing text. + // The text will have few notations possible: + // {image:X} - Where X is an integer, indicating the entry number + // inside the images field (counter starting from zero). + // This will indicate where images should be included in the + // final displayed report. + // {heading_start} / {heading_end}: An indication of main header. + // {subheading_start} / {subheading_end}: An indication of a subheader. + // This is no way meant as a "HTML/XML look-alike" in terms of definitions. + // This is merely a very simple implementation that only contains these + // "special" tags, meant to make it easier to export the final report in a + // HTML or any other format for later viewing. + optional string text = 3; + + // Optional repeated field of images that can be saved as binary data. + repeated bytes images = 4; + + // Some reports may contain counters, or some statistics that can be + // retrieved later on for additional analysis or processing. + optional Dict report_dict = 5; + optional Array report_array = 6; + + // If a filter string was used on the output, it's saved here. + optional string filter_string = 7; +}; diff --git a/plaso/proto/plaso_storage_pb2.py b/plaso/proto/plaso_storage_pb2.py new file mode 100644 index 0000000..a18267c --- /dev/null +++ b/plaso/proto/plaso_storage_pb2.py @@ -0,0 +1,1041 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! + +from google.protobuf import descriptor +from google.protobuf import message +from google.protobuf import reflection +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + + + +DESCRIPTOR = descriptor.FileDescriptor( + name='plaso/proto/plaso_storage.proto', + package='plaso_storage', + serialized_pb='\n\x1fplaso/proto/plaso_storage.proto\x12\rplaso_storage\"\xbd\x01\n\tAttribute\x12\x0b\n\x03key\x18\x01 \x02(\t\x12\x0e\n\x06string\x18\x02 \x01(\t\x12\x0f\n\x07integer\x18\x03 \x01(\x03\x12#\n\x05\x61rray\x18\x04 \x01(\x0b\x32\x14.plaso_storage.Array\x12!\n\x04\x64ict\x18\x05 \x01(\x0b\x32\x13.plaso_storage.Dict\x12\x0f\n\x07\x62oolean\x18\x06 \x01(\x08\x12\x0c\n\x04\x64\x61ta\x18\x07 \x01(\x0c\x12\r\n\x05\x66loat\x18\x08 \x01(\x02\x12\x0c\n\x04none\x18\t \x01(\x08\"4\n\x04\x44ict\x12,\n\nattributes\x18\x01 \x03(\x0b\x32\x18.plaso_storage.Attribute\"\xac\x01\n\x05Value\x12\x0f\n\x07integer\x18\x01 \x01(\x03\x12\x0e\n\x06string\x18\x02 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12#\n\x05\x61rray\x18\x04 \x01(\x0b\x32\x14.plaso_storage.Array\x12!\n\x04\x64ict\x18\x05 \x01(\x0b\x32\x13.plaso_storage.Dict\x12\x0f\n\x07\x62oolean\x18\x06 \x01(\x08\x12\r\n\x05\x66loat\x18\x07 \x01(\x02\x12\x0c\n\x04none\x18\x08 \x01(\x08\"-\n\x05\x41rray\x12$\n\x06values\x18\x01 \x03(\x0b\x32\x14.plaso_storage.Value\"\xf5\x08\n\x0b\x45ventObject\x12\x11\n\ttimestamp\x18\x01 \x01(\x03\x12\x16\n\x0etimestamp_desc\x18\x02 \x01(\t\x12\x11\n\tdata_type\x18\x03 \x02(\t\x12,\n\nattributes\x18\x04 \x03(\x0b\x32\x18.plaso_storage.Attribute\x12\x10\n\x08timezone\x18\x05 \x01(\t\x12\x10\n\x08\x66ilename\x18\x06 \x01(\t\x12\x14\n\x0c\x64isplay_name\x18\x07 \x01(\t\x12\x10\n\x08pathspec\x18\x08 \x01(\x0c\x12\x0e\n\x06offset\x18\t \x01(\x03\x12\x14\n\x0cstore_number\x18\n \x01(\x03\x12\x13\n\x0bstore_index\x18\x0b \x01(\x03\x12(\n\x03tag\x18\x0c \x01(\x0b\x32\x1b.plaso_storage.EventTagging\x12<\n\x0csource_short\x18\r \x01(\x0e\x32&.plaso_storage.EventObject.SourceShort\x12\x13\n\x0bsource_long\x18\x0e \x01(\t\x12\x0e\n\x06parser\x18\x0f \x01(\t\x12\r\n\x05inode\x18\x10 \x01(\x03\x12\x10\n\x08hostname\x18\x11 \x01(\t\x12\x0e\n\x06plugin\x18\x12 \x01(\t\x12\x15\n\rregistry_type\x18\x13 \x01(\t\x12\x11\n\tallocated\x18\x14 \x01(\x08\x12\x0f\n\x07\x66s_type\x18\x15 \x01(\t\x12\x11\n\trecovered\x18\x16 \x01(\x08\x12\x15\n\rrecord_number\x18\x17 \x01(\x03\x12\x13\n\x0bsource_name\x18\x18 \x01(\t\x12\x15\n\rcomputer_name\x18\x19 \x01(\t\x12\x18\n\x10\x65vent_identifier\x18\x1a \x01(\x03\x12\x13\n\x0b\x65vent_level\x18\x1b \x01(\x03\x12\x12\n\nxml_string\x18\x1c \x01(\t\x12%\n\x07strings\x18\x1d \x01(\x0b\x32\x14.plaso_storage.Array\x12\x10\n\x08username\x18\x1e \x01(\t\x12\x10\n\x08user_sid\x18\x1f \x01(\t\x12\x18\n\x10\x63\x61\x63hed_file_size\x18 \x01(\x03\x12\x16\n\x0enumber_of_hits\x18! \x01(\x03\x12\x1d\n\x15\x63\x61\x63he_directory_index\x18\" \x01(\x03\x12\r\n\x05title\x18# \x01(\t\x12%\n\x08metadata\x18$ \x01(\x0b\x32\x13.plaso_storage.Dict\x12\x0b\n\x03url\x18% \x01(\t\x12\x0f\n\x07keyname\x18& \x01(\t\x12%\n\x08regvalue\x18\' \x01(\x0b\x32\x13.plaso_storage.Dict\x12\x0c\n\x04text\x18( \x01(\t\x12\x0c\n\x04uuid\x18) \x01(\t\"\xad\x01\n\x0bSourceShort\x12\x06\n\x02\x41V\x10\x01\x12\x08\n\x04\x42\x41\x43K\x10\x02\x12\x07\n\x03\x45VT\x10\x03\x12\x08\n\x04\x45XIF\x10\x04\x12\x08\n\x04\x46ILE\x10\x05\x12\x07\n\x03LOG\x10\x06\x12\x07\n\x03LNK\x10\x07\x12\x07\n\x03LSO\x10\x08\x12\x08\n\x04META\x10\t\x12\t\n\x05PLIST\x10\n\x12\x07\n\x03RAM\x10\x0b\x12\n\n\x06RECBIN\x10\x0c\x12\x07\n\x03REG\x10\r\x12\x0b\n\x07WEBHIST\x10\x0e\x12\x0b\n\x07TORRENT\x10\x0f\x12\x07\n\x03JOB\x10\x10\"\xb2\x01\n\x0c\x45ventTagging\x12\x14\n\x0cstore_number\x18\x01 \x01(\x03\x12\x13\n\x0bstore_index\x18\x02 \x01(\x03\x12\x0f\n\x07\x63omment\x18\x03 \x01(\t\x12\r\n\x05\x63olor\x18\x04 \x01(\t\x12-\n\x04tags\x18\x05 \x03(\x0b\x32\x1f.plaso_storage.EventTagging.Tag\x12\x12\n\nevent_uuid\x18\x06 \x01(\t\x1a\x14\n\x03Tag\x12\r\n\x05value\x18\x01 \x02(\t\"\xfc\x01\n\nEventGroup\x12\x0c\n\x04name\x18\x01 \x02(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x17\n\x0f\x66irst_timestamp\x18\x03 \x01(\x03\x12\x16\n\x0elast_timestamp\x18\x04 \x01(\x03\x12\r\n\x05\x63olor\x18\x05 \x01(\t\x12\x10\n\x08\x63\x61tegory\x18\x06 \x01(\t\x12:\n\x06\x65vents\x18\x07 \x03(\x0b\x32*.plaso_storage.EventGroup.EventDescription\x1a=\n\x10\x45ventDescription\x12\x14\n\x0cstore_number\x18\x01 \x02(\x03\x12\x13\n\x0bstore_index\x18\x02 \x02(\x03\"\xed\x01\n\nPreProcess\x12\x33\n\x16\x63ollection_information\x18\x01 \x01(\x0b\x32\x13.plaso_storage.Dict\x12$\n\x07\x63ounter\x18\x02 \x01(\x0b\x32\x13.plaso_storage.Dict\x12)\n\x0bstore_range\x18\x03 \x01(\x0b\x32\x14.plaso_storage.Array\x12,\n\nattributes\x18\x04 \x03(\x0b\x32\x18.plaso_storage.Attribute\x12+\n\x0eplugin_counter\x18\x05 \x01(\x0b\x32\x13.plaso_storage.Dict\"\xc7\x01\n\x0e\x41nalysisReport\x12\x13\n\x0bplugin_name\x18\x01 \x01(\t\x12\x15\n\rtime_compiled\x18\x02 \x01(\x03\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x0e\n\x06images\x18\x04 \x03(\x0c\x12(\n\x0breport_dict\x18\x05 \x01(\x0b\x32\x13.plaso_storage.Dict\x12*\n\x0creport_array\x18\x06 \x01(\x0b\x32\x14.plaso_storage.Array\x12\x15\n\rfilter_string\x18\x07 \x01(\t') + + + +_EVENTOBJECT_SOURCESHORT = descriptor.EnumDescriptor( + name='SourceShort', + full_name='plaso_storage.EventObject.SourceShort', + filename=None, + file=DESCRIPTOR, + values=[ + descriptor.EnumValueDescriptor( + name='AV', index=0, number=1, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='BACK', index=1, number=2, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='EVT', index=2, number=3, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='EXIF', index=3, number=4, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='FILE', index=4, number=5, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='LOG', index=5, number=6, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='LNK', index=6, number=7, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='LSO', index=7, number=8, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='META', index=8, number=9, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='PLIST', index=9, number=10, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='RAM', index=10, number=11, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='RECBIN', index=11, number=12, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='REG', index=12, number=13, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='WEBHIST', index=13, number=14, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='TORRENT', index=14, number=15, + options=None, + type=None), + descriptor.EnumValueDescriptor( + name='JOB', index=15, number=16, + options=None, + type=None), + ], + containing_type=None, + options=None, + serialized_start=1487, + serialized_end=1660, +) + + +_ATTRIBUTE = descriptor.Descriptor( + name='Attribute', + full_name='plaso_storage.Attribute', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + descriptor.FieldDescriptor( + name='key', full_name='plaso_storage.Attribute.key', index=0, + number=1, type=9, cpp_type=9, label=2, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='string', full_name='plaso_storage.Attribute.string', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='integer', full_name='plaso_storage.Attribute.integer', index=2, + number=3, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='array', full_name='plaso_storage.Attribute.array', index=3, + number=4, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='dict', full_name='plaso_storage.Attribute.dict', index=4, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='boolean', full_name='plaso_storage.Attribute.boolean', index=5, + number=6, type=8, cpp_type=7, label=1, + has_default_value=False, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='data', full_name='plaso_storage.Attribute.data', index=6, + number=7, type=12, cpp_type=9, label=1, + has_default_value=False, default_value="", + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='float', full_name='plaso_storage.Attribute.float', index=7, + number=8, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='none', full_name='plaso_storage.Attribute.none', index=8, + number=9, type=8, cpp_type=7, label=1, + has_default_value=False, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + extension_ranges=[], + serialized_start=51, + serialized_end=240, +) + + +_DICT = descriptor.Descriptor( + name='Dict', + full_name='plaso_storage.Dict', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + descriptor.FieldDescriptor( + name='attributes', full_name='plaso_storage.Dict.attributes', index=0, + number=1, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + extension_ranges=[], + serialized_start=242, + serialized_end=294, +) + + +_VALUE = descriptor.Descriptor( + name='Value', + full_name='plaso_storage.Value', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + descriptor.FieldDescriptor( + name='integer', full_name='plaso_storage.Value.integer', index=0, + number=1, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='string', full_name='plaso_storage.Value.string', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='data', full_name='plaso_storage.Value.data', index=2, + number=3, type=12, cpp_type=9, label=1, + has_default_value=False, default_value="", + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='array', full_name='plaso_storage.Value.array', index=3, + number=4, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='dict', full_name='plaso_storage.Value.dict', index=4, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='boolean', full_name='plaso_storage.Value.boolean', index=5, + number=6, type=8, cpp_type=7, label=1, + has_default_value=False, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='float', full_name='plaso_storage.Value.float', index=6, + number=7, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='none', full_name='plaso_storage.Value.none', index=7, + number=8, type=8, cpp_type=7, label=1, + has_default_value=False, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + extension_ranges=[], + serialized_start=297, + serialized_end=469, +) + + +_ARRAY = descriptor.Descriptor( + name='Array', + full_name='plaso_storage.Array', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + descriptor.FieldDescriptor( + name='values', full_name='plaso_storage.Array.values', index=0, + number=1, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + extension_ranges=[], + serialized_start=471, + serialized_end=516, +) + + +_EVENTOBJECT = descriptor.Descriptor( + name='EventObject', + full_name='plaso_storage.EventObject', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + descriptor.FieldDescriptor( + name='timestamp', full_name='plaso_storage.EventObject.timestamp', index=0, + number=1, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='timestamp_desc', full_name='plaso_storage.EventObject.timestamp_desc', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='data_type', full_name='plaso_storage.EventObject.data_type', index=2, + number=3, type=9, cpp_type=9, label=2, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='attributes', full_name='plaso_storage.EventObject.attributes', index=3, + number=4, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='timezone', full_name='plaso_storage.EventObject.timezone', index=4, + number=5, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='filename', full_name='plaso_storage.EventObject.filename', index=5, + number=6, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='display_name', full_name='plaso_storage.EventObject.display_name', index=6, + number=7, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='pathspec', full_name='plaso_storage.EventObject.pathspec', index=7, + number=8, type=12, cpp_type=9, label=1, + has_default_value=False, default_value="", + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='offset', full_name='plaso_storage.EventObject.offset', index=8, + number=9, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='store_number', full_name='plaso_storage.EventObject.store_number', index=9, + number=10, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='store_index', full_name='plaso_storage.EventObject.store_index', index=10, + number=11, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='tag', full_name='plaso_storage.EventObject.tag', index=11, + number=12, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='source_short', full_name='plaso_storage.EventObject.source_short', index=12, + number=13, type=14, cpp_type=8, label=1, + has_default_value=False, default_value=1, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='source_long', full_name='plaso_storage.EventObject.source_long', index=13, + number=14, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='parser', full_name='plaso_storage.EventObject.parser', index=14, + number=15, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='inode', full_name='plaso_storage.EventObject.inode', index=15, + number=16, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='hostname', full_name='plaso_storage.EventObject.hostname', index=16, + number=17, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='plugin', full_name='plaso_storage.EventObject.plugin', index=17, + number=18, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='registry_type', full_name='plaso_storage.EventObject.registry_type', index=18, + number=19, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='allocated', full_name='plaso_storage.EventObject.allocated', index=19, + number=20, type=8, cpp_type=7, label=1, + has_default_value=False, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='fs_type', full_name='plaso_storage.EventObject.fs_type', index=20, + number=21, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='recovered', full_name='plaso_storage.EventObject.recovered', index=21, + number=22, type=8, cpp_type=7, label=1, + has_default_value=False, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='record_number', full_name='plaso_storage.EventObject.record_number', index=22, + number=23, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='source_name', full_name='plaso_storage.EventObject.source_name', index=23, + number=24, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='computer_name', full_name='plaso_storage.EventObject.computer_name', index=24, + number=25, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='event_identifier', full_name='plaso_storage.EventObject.event_identifier', index=25, + number=26, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='event_level', full_name='plaso_storage.EventObject.event_level', index=26, + number=27, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='xml_string', full_name='plaso_storage.EventObject.xml_string', index=27, + number=28, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='strings', full_name='plaso_storage.EventObject.strings', index=28, + number=29, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='username', full_name='plaso_storage.EventObject.username', index=29, + number=30, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='user_sid', full_name='plaso_storage.EventObject.user_sid', index=30, + number=31, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='cached_file_size', full_name='plaso_storage.EventObject.cached_file_size', index=31, + number=32, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='number_of_hits', full_name='plaso_storage.EventObject.number_of_hits', index=32, + number=33, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='cache_directory_index', full_name='plaso_storage.EventObject.cache_directory_index', index=33, + number=34, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='title', full_name='plaso_storage.EventObject.title', index=34, + number=35, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='metadata', full_name='plaso_storage.EventObject.metadata', index=35, + number=36, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='url', full_name='plaso_storage.EventObject.url', index=36, + number=37, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='keyname', full_name='plaso_storage.EventObject.keyname', index=37, + number=38, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='regvalue', full_name='plaso_storage.EventObject.regvalue', index=38, + number=39, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='text', full_name='plaso_storage.EventObject.text', index=39, + number=40, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='uuid', full_name='plaso_storage.EventObject.uuid', index=40, + number=41, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + _EVENTOBJECT_SOURCESHORT, + ], + options=None, + is_extendable=False, + extension_ranges=[], + serialized_start=519, + serialized_end=1660, +) + + +_EVENTTAGGING_TAG = descriptor.Descriptor( + name='Tag', + full_name='plaso_storage.EventTagging.Tag', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + descriptor.FieldDescriptor( + name='value', full_name='plaso_storage.EventTagging.Tag.value', index=0, + number=1, type=9, cpp_type=9, label=2, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + extension_ranges=[], + serialized_start=1821, + serialized_end=1841, +) + +_EVENTTAGGING = descriptor.Descriptor( + name='EventTagging', + full_name='plaso_storage.EventTagging', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + descriptor.FieldDescriptor( + name='store_number', full_name='plaso_storage.EventTagging.store_number', index=0, + number=1, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='store_index', full_name='plaso_storage.EventTagging.store_index', index=1, + number=2, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='comment', full_name='plaso_storage.EventTagging.comment', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='color', full_name='plaso_storage.EventTagging.color', index=3, + number=4, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='tags', full_name='plaso_storage.EventTagging.tags', index=4, + number=5, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='event_uuid', full_name='plaso_storage.EventTagging.event_uuid', index=5, + number=6, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[_EVENTTAGGING_TAG, ], + enum_types=[ + ], + options=None, + is_extendable=False, + extension_ranges=[], + serialized_start=1663, + serialized_end=1841, +) + + +_EVENTGROUP_EVENTDESCRIPTION = descriptor.Descriptor( + name='EventDescription', + full_name='plaso_storage.EventGroup.EventDescription', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + descriptor.FieldDescriptor( + name='store_number', full_name='plaso_storage.EventGroup.EventDescription.store_number', index=0, + number=1, type=3, cpp_type=2, label=2, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='store_index', full_name='plaso_storage.EventGroup.EventDescription.store_index', index=1, + number=2, type=3, cpp_type=2, label=2, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + extension_ranges=[], + serialized_start=2035, + serialized_end=2096, +) + +_EVENTGROUP = descriptor.Descriptor( + name='EventGroup', + full_name='plaso_storage.EventGroup', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + descriptor.FieldDescriptor( + name='name', full_name='plaso_storage.EventGroup.name', index=0, + number=1, type=9, cpp_type=9, label=2, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='description', full_name='plaso_storage.EventGroup.description', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='first_timestamp', full_name='plaso_storage.EventGroup.first_timestamp', index=2, + number=3, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='last_timestamp', full_name='plaso_storage.EventGroup.last_timestamp', index=3, + number=4, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='color', full_name='plaso_storage.EventGroup.color', index=4, + number=5, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='category', full_name='plaso_storage.EventGroup.category', index=5, + number=6, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='events', full_name='plaso_storage.EventGroup.events', index=6, + number=7, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[_EVENTGROUP_EVENTDESCRIPTION, ], + enum_types=[ + ], + options=None, + is_extendable=False, + extension_ranges=[], + serialized_start=1844, + serialized_end=2096, +) + + +_PREPROCESS = descriptor.Descriptor( + name='PreProcess', + full_name='plaso_storage.PreProcess', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + descriptor.FieldDescriptor( + name='collection_information', full_name='plaso_storage.PreProcess.collection_information', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='counter', full_name='plaso_storage.PreProcess.counter', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='store_range', full_name='plaso_storage.PreProcess.store_range', index=2, + number=3, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='attributes', full_name='plaso_storage.PreProcess.attributes', index=3, + number=4, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='plugin_counter', full_name='plaso_storage.PreProcess.plugin_counter', index=4, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + extension_ranges=[], + serialized_start=2099, + serialized_end=2336, +) + + +_ANALYSISREPORT = descriptor.Descriptor( + name='AnalysisReport', + full_name='plaso_storage.AnalysisReport', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + descriptor.FieldDescriptor( + name='plugin_name', full_name='plaso_storage.AnalysisReport.plugin_name', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='time_compiled', full_name='plaso_storage.AnalysisReport.time_compiled', index=1, + number=2, type=3, cpp_type=2, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='text', full_name='plaso_storage.AnalysisReport.text', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='images', full_name='plaso_storage.AnalysisReport.images', index=3, + number=4, type=12, cpp_type=9, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='report_dict', full_name='plaso_storage.AnalysisReport.report_dict', index=4, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='report_array', full_name='plaso_storage.AnalysisReport.report_array', index=5, + number=6, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + descriptor.FieldDescriptor( + name='filter_string', full_name='plaso_storage.AnalysisReport.filter_string', index=6, + number=7, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=unicode("", "utf-8"), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + extension_ranges=[], + serialized_start=2339, + serialized_end=2538, +) + +_ATTRIBUTE.fields_by_name['array'].message_type = _ARRAY +_ATTRIBUTE.fields_by_name['dict'].message_type = _DICT +_DICT.fields_by_name['attributes'].message_type = _ATTRIBUTE +_VALUE.fields_by_name['array'].message_type = _ARRAY +_VALUE.fields_by_name['dict'].message_type = _DICT +_ARRAY.fields_by_name['values'].message_type = _VALUE +_EVENTOBJECT.fields_by_name['attributes'].message_type = _ATTRIBUTE +_EVENTOBJECT.fields_by_name['tag'].message_type = _EVENTTAGGING +_EVENTOBJECT.fields_by_name['source_short'].enum_type = _EVENTOBJECT_SOURCESHORT +_EVENTOBJECT.fields_by_name['strings'].message_type = _ARRAY +_EVENTOBJECT.fields_by_name['metadata'].message_type = _DICT +_EVENTOBJECT.fields_by_name['regvalue'].message_type = _DICT +_EVENTOBJECT_SOURCESHORT.containing_type = _EVENTOBJECT; +_EVENTTAGGING_TAG.containing_type = _EVENTTAGGING; +_EVENTTAGGING.fields_by_name['tags'].message_type = _EVENTTAGGING_TAG +_EVENTGROUP_EVENTDESCRIPTION.containing_type = _EVENTGROUP; +_EVENTGROUP.fields_by_name['events'].message_type = _EVENTGROUP_EVENTDESCRIPTION +_PREPROCESS.fields_by_name['collection_information'].message_type = _DICT +_PREPROCESS.fields_by_name['counter'].message_type = _DICT +_PREPROCESS.fields_by_name['store_range'].message_type = _ARRAY +_PREPROCESS.fields_by_name['attributes'].message_type = _ATTRIBUTE +_PREPROCESS.fields_by_name['plugin_counter'].message_type = _DICT +_ANALYSISREPORT.fields_by_name['report_dict'].message_type = _DICT +_ANALYSISREPORT.fields_by_name['report_array'].message_type = _ARRAY +DESCRIPTOR.message_types_by_name['Attribute'] = _ATTRIBUTE +DESCRIPTOR.message_types_by_name['Dict'] = _DICT +DESCRIPTOR.message_types_by_name['Value'] = _VALUE +DESCRIPTOR.message_types_by_name['Array'] = _ARRAY +DESCRIPTOR.message_types_by_name['EventObject'] = _EVENTOBJECT +DESCRIPTOR.message_types_by_name['EventTagging'] = _EVENTTAGGING +DESCRIPTOR.message_types_by_name['EventGroup'] = _EVENTGROUP +DESCRIPTOR.message_types_by_name['PreProcess'] = _PREPROCESS +DESCRIPTOR.message_types_by_name['AnalysisReport'] = _ANALYSISREPORT + +class Attribute(message.Message): + __metaclass__ = reflection.GeneratedProtocolMessageType + DESCRIPTOR = _ATTRIBUTE + + # @@protoc_insertion_point(class_scope:plaso_storage.Attribute) + +class Dict(message.Message): + __metaclass__ = reflection.GeneratedProtocolMessageType + DESCRIPTOR = _DICT + + # @@protoc_insertion_point(class_scope:plaso_storage.Dict) + +class Value(message.Message): + __metaclass__ = reflection.GeneratedProtocolMessageType + DESCRIPTOR = _VALUE + + # @@protoc_insertion_point(class_scope:plaso_storage.Value) + +class Array(message.Message): + __metaclass__ = reflection.GeneratedProtocolMessageType + DESCRIPTOR = _ARRAY + + # @@protoc_insertion_point(class_scope:plaso_storage.Array) + +class EventObject(message.Message): + __metaclass__ = reflection.GeneratedProtocolMessageType + DESCRIPTOR = _EVENTOBJECT + + # @@protoc_insertion_point(class_scope:plaso_storage.EventObject) + +class EventTagging(message.Message): + __metaclass__ = reflection.GeneratedProtocolMessageType + + class Tag(message.Message): + __metaclass__ = reflection.GeneratedProtocolMessageType + DESCRIPTOR = _EVENTTAGGING_TAG + + # @@protoc_insertion_point(class_scope:plaso_storage.EventTagging.Tag) + DESCRIPTOR = _EVENTTAGGING + + # @@protoc_insertion_point(class_scope:plaso_storage.EventTagging) + +class EventGroup(message.Message): + __metaclass__ = reflection.GeneratedProtocolMessageType + + class EventDescription(message.Message): + __metaclass__ = reflection.GeneratedProtocolMessageType + DESCRIPTOR = _EVENTGROUP_EVENTDESCRIPTION + + # @@protoc_insertion_point(class_scope:plaso_storage.EventGroup.EventDescription) + DESCRIPTOR = _EVENTGROUP + + # @@protoc_insertion_point(class_scope:plaso_storage.EventGroup) + +class PreProcess(message.Message): + __metaclass__ = reflection.GeneratedProtocolMessageType + DESCRIPTOR = _PREPROCESS + + # @@protoc_insertion_point(class_scope:plaso_storage.PreProcess) + +class AnalysisReport(message.Message): + __metaclass__ = reflection.GeneratedProtocolMessageType + DESCRIPTOR = _ANALYSISREPORT + + # @@protoc_insertion_point(class_scope:plaso_storage.AnalysisReport) + +# @@protoc_insertion_point(module_scope) diff --git a/plaso/serializer/__init__.py b/plaso/serializer/__init__.py new file mode 100644 index 0000000..f462564 --- /dev/null +++ b/plaso/serializer/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/serializer/interface.py b/plaso/serializer/interface.py new file mode 100644 index 0000000..2b94d0a --- /dev/null +++ b/plaso/serializer/interface.py @@ -0,0 +1,181 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The serializer object interfaces.""" + +# Since abc does not seem to have an @abc.abstractclassmethod we're using +# @abc.abstractmethod instead and shutting up pylint about: +# E0213: Method should have "self" as first argument. +# pylint: disable=no-self-argument + +import abc + + +class AnalysisReportSerializer(object): + """Class that implements the analysis report serializer interface.""" + + @abc.abstractmethod + def ReadSerialized(cls, serialized): + """Reads an analysis report from serialized form. + + Args: + serialized: an object containing the serialized form. + + Returns: + An analysis report (instance of AnalysisReport). + """ + + @abc.abstractmethod + def WriteSerialized(cls, analysis_report): + """Writes an analysis report to serialized form. + + Args: + analysis_report: an analysis report (instance of AnalysisReport). + + Returns: + An object containing the serialized form. + """ + + +class EventGroupSerializer(object): + """Class that implements the event group serializer interface.""" + + @abc.abstractmethod + def ReadSerialized(cls, serialized): + """Reads an event group from serialized form. + + Args: + serialized: an group containing the serialized form. + + Returns: + An event group (instance of EventGroup). + """ + + @abc.abstractmethod + def WriteSerialized(cls, event_group): + """Writes an event group to serialized form. + + Args: + event_group: an event group (instance of EventGroup). + + Returns: + An group containing the serialized form. + """ + + +class EventObjectSerializer(object): + """Class that implements the event object serializer interface.""" + + @abc.abstractmethod + def ReadSerialized(cls, serialized): + """Reads an event object from serialized form. + + Args: + serialized: an object containing the serialized form. + + Returns: + An event object (instance of EventObject). + """ + + @abc.abstractmethod + def WriteSerialized(cls, event_object): + """Writes an event object to serialized form. + + Args: + event_object: an event object (instance of EventObject). + + Returns: + An object containing the serialized form. + """ + + +class EventTagSerializer(object): + """Class that implements the event tag serializer interface.""" + + @abc.abstractmethod + def ReadSerialized(cls, serialized): + """Reads an event tag from serialized form. + + Args: + serialized: an object containing the serialized form. + + Returns: + An event tag (instance of EventTag). + """ + + @abc.abstractmethod + def WriteSerialized(cls, event_tag): + """Writes an event tag to serialized form. + + Args: + event_tag: an event tag (instance of EventTag). + + Returns: + An object containing the serialized form. + """ + + +class PathFilterSerializer(object): + """Class that implements the path filter serializer interface.""" + + @abc.abstractmethod + def ReadSerialized(cls, serialized): + """Reads a path filter from serialized form. + + Args: + serialized: an object containing the serialized form. + + Returns: + A path filter (instance of PathFilter). + """ + + @abc.abstractmethod + def WriteSerialized(cls, path_filter): + """Writes a path filter to serialized form. + + Args: + path_filter: a path filter (instance of PathFilter). + + Returns: + An object containing the serialized form. + """ + + +class PreprocessObjectSerializer(object): + """Class that implements the preprocessing object serializer interface.""" + + @abc.abstractmethod + def ReadSerialized(cls, serialized): + """Reads a path filter from serialized form. + + Args: + serialized: an object containing the serialized form. + + Returns: + A preprocessing object (instance of PreprocessObject). + """ + + @abc.abstractmethod + def WriteSerialized(cls, pre_obj): + """Writes a preprocessing object to serialized form. + + Args: + pro_obj: a preprocessing object (instance of PreprocessObject). + + Returns: + An object containing the serialized form. + """ diff --git a/plaso/serializer/json_serializer.py b/plaso/serializer/json_serializer.py new file mode 100644 index 0000000..ee1cf31 --- /dev/null +++ b/plaso/serializer/json_serializer.py @@ -0,0 +1,232 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The json serializer object implementation.""" + +import logging +import json + +from dfvfs.serializer import json_serializer as dfvfs_json_serializer + +from plaso.lib import event +from plaso.serializer import interface + + +class _EventTypeJsonEncoder(json.JSONEncoder): + """A class that implements an event type object JSON encoder.""" + + # pylint: disable=method-hidden + def default(self, object_instance): + """Returns a serialized version of an event type object. + + Args: + object_instance: instance of an event type object. + """ + # TODO: add support for the rest of the event type objects. + if isinstance(object_instance, event.EventTag): + return JsonEventTagSerializer.WriteSerialized(object_instance) + + else: + return super(_EventTypeJsonEncoder, self).default(object_instance) + + +class JsonAnalysisReportSerializer(interface.AnalysisReportSerializer): + """Class that implements the json analysis report serializer.""" + + @classmethod + def ReadSerialized(cls, json_string): + """Reads an analysis report from serialized form. + + Args: + json_string: a JSON string containing the serialized form. + + Returns: + An analysis report (instance of AnalysisReport). + """ + # TODO: implement. + pass + + @classmethod + def WriteSerialized(cls, analysis_report): + """Writes an analysis report to serialized form. + + Args: + analysis_report: an analysis report (instance of AnalysisReport). + + Returns: + A JSON string containing the serialized form. + """ + # TODO: implement. + pass + + +class JsonEventObjectSerializer(interface.EventObjectSerializer): + """Class that implements the json event object serializer.""" + + @classmethod + def ReadSerialized(cls, json_string): + """Reads an event object from serialized form. + + Args: + json_string: an object containing the serialized form. + + Returns: + An event object (instance of EventObject). + """ + event_object = event.EventObject() + json_attributes = json.loads(json_string) + + for key, value in json_attributes.iteritems(): + if key == 'tag': + value = JsonEventTagSerializer.ReadSerialized(value) + elif key == 'pathspec': + value = dfvfs_json_serializer.JsonPathSpecSerializer.ReadSerialized( + value) + + setattr(event_object, key, value) + + return event_object + + @classmethod + def WriteSerialized(cls, event_object): + """Writes an event object to serialized form. + + Args: + event_object: an event object (instance of EventObject). + + Returns: + An object containing the serialized form or None if the event + cannot be serialized. + """ + event_attributes = event_object.GetValues() + + serializer = dfvfs_json_serializer.JsonPathSpecSerializer + if 'pathspec' in event_attributes: + event_attributes['pathspec'] = serializer.WriteSerialized( + event_attributes['pathspec']) + + try: + return json.dumps(event_attributes, cls=_EventTypeJsonEncoder) + except UnicodeDecodeError as exception: + # TODO: Add better error handling so this can be traced to a parser or + # a plugin and to which file that caused it. + logging.error(u'Unable to serialize event with error: {0:s}'.format( + exception)) + + +class JsonEventTagSerializer(interface.EventTagSerializer): + """Class that implements the json event tag serializer.""" + + @classmethod + def ReadSerialized(cls, json_string): + """Reads an event tag from serialized form. + + Args: + json_string: a JSON string containing the serialized form. + + Returns: + An event tag (instance of EventTag). + """ + if not json_string: + return + + event_tag = event.EventTag() + + json_attributes = json.loads(json_string) + + for key, value in json_attributes.iteritems(): + setattr(event_tag, key, value) + + return event_tag + + @classmethod + def WriteSerialized(cls, event_tag): + """Writes an event tag to serialized form. + + Args: + event_tag: an event tag (instance of EventTag). + + Returns: + A JSON string containing the serialized form. + + Raises: + RuntimeError: when the event tag is not valid for serialization. + """ + if not event_tag.IsValidForSerialization(): + raise RuntimeError(u'Invalid tag object not valid for serialization.') + + return json.dumps(event_tag.__dict__) + + +class JsonPathFilterSerializer(interface.PathFilterSerializer): + """Class that implements the json path filter serializer.""" + + @classmethod + def ReadSerialized(cls, serialized): + """Reads a path filter from serialized form. + + Args: + serialized: a JSON string containing the serialized form. + + Returns: + A path filter (instance of PathFilter). + """ + # TODO: implement. + pass + + @classmethod + def WriteSerialized(cls, path_filter): + """Writes a path filter to serialized form. + + Args: + path_filter: a path filter (instance of PathFilter). + + Returns: + A JSON string containing the serialized form. + """ + # TODO: implement. + pass + + +class JsonPreprocessObjectSerializer(interface.PreprocessObjectSerializer): + """Class that implements the json preprocessing object serializer.""" + + @classmethod + def ReadSerialized(cls, json_string): + """Reads a path filter from serialized form. + + Args: + json_string: a JSON string containing the serialized form. + + Returns: + A preprocessing object (instance of PreprocessObject). + """ + # TODO: implement. + pass + + @classmethod + def WriteSerialized(cls, pre_obj): + """Writes a preprocessing object to serialized form. + + Args: + pro_obj: a preprocessing object (instance of PreprocessObject). + + Returns: + A JSON string containing the serialized form. + """ + # TODO: implement. + pass diff --git a/plaso/serializer/json_serializer_test.py b/plaso/serializer/json_serializer_test.py new file mode 100644 index 0000000..f0a9b83 --- /dev/null +++ b/plaso/serializer/json_serializer_test.py @@ -0,0 +1,126 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the serializer object implementation using json.""" + +import re +import unittest + +from plaso.lib import event +from plaso.serializer import json_serializer + +# TODO: add tests for the non implemented serializer objects when implemented. + + +class JsonEventObjectSerializerTest(unittest.TestCase): + """Tests for the json event object serializer object.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + self._json_string = """{ + "zero_integer": 0, + "my_dict": { + "a": "not b", + "c": 34, + "list": ["sf", 234], "an": [234, 32]}, + "uuid": "5a78777006de4ddb8d7bbe12ab92ccf8", + "timestamp_desc": "Written", + "a_tuple": [ + "some item", + [234, 52, 15], + {"a": "not a", "b": "not b"}, + 35], + "timestamp": 1234124, + "my_list": ["asf", 4234, 2, 54, "asf"], + "empty_string": "", + "data_type": "test:event2", + "null_value": null, + "unicode_string": "And I'm a unicorn.", + "integer": 34, + "string": "Normal string"}""" + + # Collapse multiple spaces and new lines into a single space. + expression = re.compile(r'[ \n]+') + self._json_string = expression.sub(' ', self._json_string) + # Remove spaces after { and [ characters. + expression = re.compile(r'([{[])[ ]+') + self._json_string = expression.sub('\\1', self._json_string) + # Remove spaces before } and ] characters. + expression = re.compile(r'[ ]+([}\]])') + self._json_string = expression.sub('\\1', self._json_string) + + def testReadSerialized(self): + """Test the read serialized functionality.""" + serializer = json_serializer.JsonEventObjectSerializer + event_object = serializer.ReadSerialized(self._json_string) + + # An integer value containing 0 should get stored. + self.assertTrue(hasattr(event_object, 'zero_integer')) + + attribute_value = getattr(event_object, 'integer', 0) + self.assertEquals(attribute_value, 34) + + attribute_value = getattr(event_object, 'my_list', []) + self.assertEquals(len(attribute_value), 5) + + attribute_value = getattr(event_object, 'string', '') + self.assertEquals(attribute_value, 'Normal string') + + attribute_value = getattr(event_object, 'unicode_string', u'') + self.assertEquals(attribute_value, u'And I\'m a unicorn.') + + attribute_value = getattr(event_object, 'a_tuple', ()) + self.assertEquals(len(attribute_value), 4) + + def testWriteSerialized(self): + """Test the write serialized functionality.""" + event_object = event.EventObject() + + event_object.data_type = 'test:event2' + event_object.timestamp = 1234124 + event_object.timestamp_desc = 'Written' + # Prevent the event object for generating its own UUID. + event_object.uuid = '5a78777006de4ddb8d7bbe12ab92ccf8' + + event_object.empty_string = u'' + event_object.zero_integer = 0 + event_object.integer = 34 + event_object.string = 'Normal string' + event_object.unicode_string = u'And I\'m a unicorn.' + event_object.my_list = ['asf', 4234, 2, 54, 'asf'] + event_object.my_dict = { + 'a': 'not b', 'c': 34, 'list': ['sf', 234], 'an': [234, 32]} + event_object.a_tuple = ( + 'some item', [234, 52, 15], {'a': 'not a', 'b': 'not b'}, 35) + event_object.null_value = None + + serializer = json_serializer.JsonEventObjectSerializer + json_string = serializer.WriteSerialized(event_object) + self.assertEquals(sorted(json_string), sorted(self._json_string)) + + event_object = serializer.ReadSerialized(json_string) + + # TODO: fix this. + # An empty string should not get stored. + # self.assertFalse(hasattr(event_object, 'empty_string')) + + # A None (or Null) value should not get stored. + # self.assertFalse(hasattr(event_object, 'null_value')) + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/serializer/protobuf_serializer.py b/plaso/serializer/protobuf_serializer.py new file mode 100644 index 0000000..495cd9a --- /dev/null +++ b/plaso/serializer/protobuf_serializer.py @@ -0,0 +1,737 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The protobuf serializer object implementation.""" + +import logging + +from dfvfs.serializer import protobuf_serializer as dfvfs_protobuf_serializer +from google.protobuf import message + +from plaso.lib import event +from plaso.lib import utils +from plaso.proto import plaso_storage_pb2 +from plaso.serializer import interface + + +class ProtobufEventAttributeSerializer(object): + """Class that implements the protobuf event attribute serializer.""" + + @classmethod + def ReadSerializedObject(cls, proto_attribute): + """Reads an event attribute from serialized form. + + Args: + proto_attribute: a protobuf attribute object containing the serialized + form. + + Returns: + A tuple containing the attribute name and value. + + Raises: + RuntimeError: when the protobuf attribute (field) type is not supported. + """ + attribute_name = u'' + try: + if proto_attribute.HasField('key'): + attribute_name = proto_attribute.key + except ValueError: + pass + + if not isinstance(proto_attribute, ( + plaso_storage_pb2.Attribute, plaso_storage_pb2.Value)): + raise RuntimeError(u'Unsupported protobuf type.') + + if proto_attribute.HasField('string'): + return attribute_name, proto_attribute.string + + elif proto_attribute.HasField('integer'): + return attribute_name, proto_attribute.integer + + elif proto_attribute.HasField('boolean'): + return attribute_name, proto_attribute.boolean + + elif proto_attribute.HasField('dict'): + attribute_value = {} + + for proto_dict in proto_attribute.dict.attributes: + dict_key, dict_value = cls.ReadSerializedObject(proto_dict) + attribute_value[dict_key] = dict_value + return attribute_name, attribute_value + + elif proto_attribute.HasField('array'): + attribute_value = [] + + for proto_array in proto_attribute.array.values: + _, list_value = cls.ReadSerializedObject(proto_array) + attribute_value.append(list_value) + return attribute_name, attribute_value + + elif proto_attribute.HasField('data'): + return attribute_name, proto_attribute.data + + elif proto_attribute.HasField('float'): + return attribute_name, proto_attribute.float + + elif proto_attribute.HasField('none'): + return attribute_name, None + + else: + raise RuntimeError(u'Unsupported proto attribute type.') + + @classmethod + def ReadSerializedDictObject(cls, proto_dict): + """Reads a dictionary event attribute from serialized form. + + Args: + proto_dict: a protobuf Dict object containing the serialized form. + + Returns: + A dictionary object. + """ + dict_object = {} + for proto_attribute in proto_dict.attributes: + dict_key, dict_value = cls.ReadSerializedObject(proto_attribute) + dict_object[dict_key] = dict_value + + return dict_object + + @classmethod + def ReadSerializedListObject(cls, proto_list): + """Reads a list event attribute from serialized form. + + Args: + proto_list: a protobuf List object containing the serialized form. + + Returns: + A list object. + """ + list_object = [] + for proto_value in proto_list.values: + _, list_value = cls.ReadSerializedObject(proto_value) + list_object.append(list_value) + + return list_object + + @classmethod + def WriteSerializedObject( + cls, proto_attribute, attribute_name, attribute_value): + """Writes an event attribute to serialized form. + + The attribute of an event object can store almost any + arbitrary data, so the corresponding protobuf storage must deal with the + various data types. This method identifies the data type and assigns it + properly to the attribute protobuf. + + Args: + proto_attribute: a protobuf attribute object. + attribute_name: the name of the attribute. + attribute_value: the value of the attribute. + + Returns: + A protobuf object containing the serialized form. + """ + if attribute_name: + proto_attribute.key = attribute_name + + if isinstance(attribute_value, (str, unicode)): + proto_attribute.string = utils.GetUnicodeString(attribute_value) + + elif isinstance(attribute_value, bool): + proto_attribute.boolean = attribute_value + + elif isinstance(attribute_value, (int, long)): + # TODO: add some bounds checking. + proto_attribute.integer = attribute_value + + elif isinstance(attribute_value, dict): + cls.WriteSerializedDictObject(proto_attribute, 'dict', attribute_value) + + elif isinstance(attribute_value, (list, tuple)): + cls.WriteSerializedListObject(proto_attribute, 'array', attribute_value) + + elif isinstance(attribute_value, float): + proto_attribute.float = attribute_value + + elif not attribute_value: + proto_attribute.none = True + + else: + proto_attribute.data = attribute_value + + @classmethod + def WriteSerializedDictObject( + cls, proto_attribute, attribute_name, dict_object): + """Writes a dictionary event attribute to serialized form. + + Args: + proto_attribute: a protobuf attribute object. + attribute_name: the name of the attribute. + ditctobject: a dictionary object that is the value of the attribute. + """ + dict_proto = plaso_storage_pb2.Dict() + + for dict_key, dict_value in dict_object.items(): + dict_proto_add = dict_proto.attributes.add() + cls.WriteSerializedObject(dict_proto_add, dict_key, dict_value) + + dict_attribute = getattr(proto_attribute, attribute_name) + dict_attribute.MergeFrom(dict_proto) + + @classmethod + def WriteSerializedListObject( + cls, proto_attribute, attribute_name, list_object): + """Writes a list event attribute to serialized form. + + Args: + proto_attribute: a protobuf attribute object. + attribute_name: the name of the attribute. + list_object: a list object that is the value of the attribute. + """ + list_proto = plaso_storage_pb2.Array() + + for list_value in list_object: + list_proto_add = list_proto.values.add() + cls.WriteSerializedObject(list_proto_add, '', list_value) + + list_attribute = getattr(proto_attribute, attribute_name) + list_attribute.MergeFrom(list_proto) + + +class ProtobufAnalysisReportSerializer(interface.AnalysisReportSerializer): + """Class that implements the protobuf analysis report serializer.""" + + @classmethod + def ReadSerializedObject(cls, proto): + """Reads an analysis report from serialized form. + + Args: + proto: a protobuf object containing the serialized form (instance of + plaso_storage_pb2.AnalysisReport). + + Returns: + An analysis report (instance of AnalysisReport). + """ + analysis_report = event.AnalysisReport() + + for proto_attribute, value in proto.ListFields(): + # TODO: replace by ReadSerializedDictObject, need tests first. + # dict_object = ProtobufEventAttributeSerializer.ReadSerializedDictObject( + # proto.report_dict) + if proto_attribute.name == 'report_dict': + new_value = {} + for proto_dict in proto.report_dict.attributes: + dict_key, dict_value = ( + ProtobufEventAttributeSerializer.ReadSerializedObject(proto_dict)) + new_value[dict_key] = dict_value + setattr(analysis_report, proto_attribute.name, new_value) + + # TODO: replace by ReadSerializedListObject, need tests first. + # list_object = ProtobufEventAttributeSerializer.ReadSerializedListObject( + # proto.report_array) + elif proto_attribute.name == 'report_array': + new_value = [] + + for proto_array in proto.report_array.values: + _, list_value = ProtobufEventAttributeSerializer.ReadSerializedObject( + proto_array) + new_value.append(list_value) + setattr(analysis_report, proto_attribute.name, new_value) + + else: + setattr(analysis_report, proto_attribute.name, value) + + return analysis_report + + @classmethod + def ReadSerialized(cls, proto_string): + """Reads an analysis report from serialized form. + + Args: + proto_string: a protobuf string containing the serialized form. + + Returns: + An analysis report (instance of AnalysisReport). + """ + proto = plaso_storage_pb2.AnalysisReport() + proto.ParseFromString(proto_string) + + return cls.ReadSerializedObject(proto) + + @classmethod + def WriteSerializedObject(cls, analysis_report): + """Writes an analysis report to serialized form. + + Args: + analysis_report: an analysis report (instance of AnalysisReport). + + Returns: + A protobuf object containing the serialized form (instance of + plaso_storage_pb2.AnalysisReport). + """ + proto = plaso_storage_pb2.AnalysisReport() + proto.time_compiled = getattr(analysis_report, 'time_compiled', 0) + plugin_name = getattr(analysis_report, 'plugin_name', None) + + if plugin_name: + proto.plugin_name = plugin_name + + proto.text = getattr(analysis_report, 'text', 'N/A') + + for image in getattr(analysis_report, 'images', []): + proto.images.append(image) + + if hasattr(analysis_report, 'report_dict'): + dict_proto = plaso_storage_pb2.Dict() + for key, value in getattr(analysis_report, 'report_dict', {}).iteritems(): + sub_proto = dict_proto.attributes.add() + ProtobufEventAttributeSerializer.WriteSerializedObject( + sub_proto, key, value) + proto.report_dict.MergeFrom(dict_proto) + + if hasattr(analysis_report, 'report_array'): + list_proto = plaso_storage_pb2.Array() + for value in getattr(analysis_report, 'report_array', []): + sub_proto = list_proto.values.add() + ProtobufEventAttributeSerializer.WriteSerializedObject( + sub_proto, '', value) + + proto.report_array.MergeFrom(list_proto) + + return proto + + @classmethod + def WriteSerialized(cls, analysis_report): + """Writes an analysis report to serialized form. + + Args: + analysis_report: an analysis report (instance of AnalysisReport). + + Returns: + A protobuf string containing the serialized form. + """ + proto = cls.WriteSerializedObject(analysis_report) + return proto.SerializeToString() + + +class ProtobufEventObjectSerializer(interface.EventObjectSerializer): + """Class that implements the protobuf event object serializer.""" + + # TODO: check if the next TODO still applies. + # TODO: remove this once source_short has been moved to event formatter. + # Lists of the mappings between the source short values of the event object + # and those used in the protobuf. + _SOURCE_SHORT_FROM_PROTO_MAP = {} + _SOURCE_SHORT_TO_PROTO_MAP = {} + for value in plaso_storage_pb2.EventObject.DESCRIPTOR.enum_types_by_name[ + 'SourceShort'].values: + _SOURCE_SHORT_FROM_PROTO_MAP[value.number] = value.name + _SOURCE_SHORT_TO_PROTO_MAP[value.name] = value.number + _SOURCE_SHORT_FROM_PROTO_MAP.setdefault(6) + _SOURCE_SHORT_TO_PROTO_MAP.setdefault('LOG') + + _path_spec_serializer = dfvfs_protobuf_serializer.ProtobufPathSpecSerializer + + @classmethod + def ReadSerializedObject(cls, proto): + """Reads an event object from serialized form. + + Args: + proto: a protobuf object containing the serialized form (instance of + plaso_storage_pb2.EventObject). + + Returns: + An event object (instance of EventObject). + """ + event_object = event.EventObject() + event_object.data_type = proto.data_type + + for proto_attribute, value in proto.ListFields(): + if proto_attribute.name == 'source_short': + event_object.source_short = cls._SOURCE_SHORT_FROM_PROTO_MAP[value] + + elif proto_attribute.name == 'pathspec': + event_object.pathspec = ( + cls._path_spec_serializer.ReadSerialized(proto.pathspec)) + + elif proto_attribute.name == 'tag': + event_object.tag = ProtobufEventTagSerializer.ReadSerializedObject( + proto.tag) + + elif proto_attribute.name == 'attributes': + continue + + else: + # Register the attribute correctly. + # The attribute can be a 'regular' high level attribute or + # a message (Dict/Array) that need special handling. + if isinstance(value, message.Message): + if value.DESCRIPTOR.full_name.endswith('.Dict'): + value = ProtobufEventAttributeSerializer.ReadSerializedDictObject( + value) + elif value.DESCRIPTOR.full_name.endswith('.Array'): + value = ProtobufEventAttributeSerializer.ReadSerializedListObject( + value) + else: + value = ProtobufEventAttributeSerializer.ReadSerializedObject(value) + + setattr(event_object, proto_attribute.name, value) + + # The plaso_storage_pb2.EventObject protobuf contains a field named + # attributes which technically not a Dict but behaves similar. + dict_object = ProtobufEventAttributeSerializer.ReadSerializedDictObject( + proto) + + for attribute, value in dict_object.iteritems(): + setattr(event_object, attribute, value) + + return event_object + + @classmethod + def ReadSerialized(cls, proto_string): + """Reads an event object from serialized form. + + Args: + proto_string: a protobuf string containing the serialized form. + + Returns: + An event object (instance of EventObject). + """ + proto = plaso_storage_pb2.EventObject() + proto.ParseFromString(proto_string) + + return cls.ReadSerializedObject(proto) + + @classmethod + def WriteSerializedObject(cls, event_object): + """Writes an event object to serialized form. + + Args: + event_object: an event object (instance of EventObject). + + Returns: + A protobuf object containing the serialized form (instance of + plaso_storage_pb2.EventObject). + """ + proto = plaso_storage_pb2.EventObject() + + proto.data_type = getattr(event_object, 'data_type', 'event') + + for attribute_name in event_object.GetAttributes(): + if attribute_name == 'source_short': + proto.source_short = cls._SOURCE_SHORT_TO_PROTO_MAP[ + event_object.source_short] + + elif attribute_name == 'pathspec': + attribute_value = getattr(event_object, attribute_name, None) + if attribute_value: + attribute_value = cls._path_spec_serializer.WriteSerialized( + attribute_value) + setattr(proto, attribute_name, attribute_value) + + elif attribute_name == 'tag': + attribute_value = getattr(event_object, attribute_name, None) + if attribute_value: + event_tag_proto = ProtobufEventTagSerializer.WriteSerializedObject( + attribute_value) + proto.tag.MergeFrom(event_tag_proto) + + elif hasattr(proto, attribute_name): + attribute_value = getattr(event_object, attribute_name) + + if attribute_value is None: + continue + + if isinstance(attribute_value, (str, unicode)): + attribute_value = utils.GetUnicodeString(attribute_value) + if not attribute_value: + continue + + if isinstance(attribute_value, dict): + ProtobufEventAttributeSerializer.WriteSerializedDictObject( + proto, attribute_name, attribute_value) + + elif isinstance(attribute_value, (list, tuple)): + ProtobufEventAttributeSerializer.WriteSerializedListObject( + proto, attribute_name, attribute_value) + + else: + try: + setattr(proto, attribute_name, attribute_value) + except ValueError as exception: + path_spec = getattr(event_object, 'pathspec', None) + path = getattr(path_spec, 'location', u'') + logging.error(( + u'Unable to save value for: {0:s} [{1:s}] with error: {2:s} ' + u'coming from file: {3:s}').format( + attribute_name, type(attribute_value), exception, path)) + # Catch potential out of range errors. + if isinstance(attribute_value, (int, long)): + setattr(proto, attribute_name, -1) + + else: + attribute_value = getattr(event_object, attribute_name) + + # TODO: check if the next TODO still applies. + # Serialize the attribute value only if it is an integer type + # (int or long) or if it has a value. + # TODO: fix logic. + if (isinstance(attribute_value, (bool, int, float, long)) or + attribute_value): + proto_attribute = proto.attributes.add() + ProtobufEventAttributeSerializer.WriteSerializedObject( + proto_attribute, attribute_name, attribute_value) + + return proto + + @classmethod + def WriteSerialized(cls, event_object): + """Writes an event object to serialized form. + + Args: + event_object: an event object (instance of EventObject). + + Returns: + A protobuf string containing the serialized form or None if + there is an error encoding the protobuf. + """ + proto = cls.WriteSerializedObject(event_object) + try: + return proto.SerializeToString() + except message.EncodeError: + # TODO: Add better error handling so this can be traced to a parser or + # a plugin and to which file that caused it. + logging.error(u'Unable to serialize event object.') + + +class ProtobufEventTagSerializer(interface.EventTagSerializer): + """Class that implements the protobuf event tag serializer.""" + + @classmethod + def ReadSerializedObject(cls, proto): + """Reads an event tag from serialized form. + + Args: + proto: a protobuf object containing the serialized form (instance of + plaso_storage_pb2.EventTag). + + Returns: + An event tag (instance of EventTag). + """ + event_tag = event.EventTag() + + for proto_attribute, attribute_value in proto.ListFields(): + if proto_attribute.name == 'tags': + event_tag.tags = [] + for proto_tag in proto.tags: + event_tag.tags.append(proto_tag.value) + else: + setattr(event_tag, proto_attribute.name, attribute_value) + + return event_tag + + @classmethod + def ReadSerialized(cls, proto_string): + """Reads an event tag from serialized form. + + Args: + proto_string: a protobuf string containing the serialized form. + + Returns: + An event tag (instance of EventTag). + """ + proto = plaso_storage_pb2.EventTagging() + proto.ParseFromString(proto_string) + + return cls.ReadSerializedObject(proto) + + @classmethod + def WriteSerializedObject(cls, event_tag): + """Writes an event tag to serialized form. + + Args: + event_tag: an event tag (instance of EventTag). + + Returns: + A protobuf object containing the serialized form (instance of + plaso_storage_pb2.EventTagging). + """ + proto = plaso_storage_pb2.EventTagging() + + # TODO: Once we move EventTag to slots we need to query __slots__ + # instead of __dict__ + for attribute_name in event_tag.__dict__: + attribute_value = getattr(event_tag, attribute_name, None) + + if attribute_name == 'tags' and type(attribute_value) in (tuple, list): + for tag_string in attribute_value: + proto_tag_add = proto.tags.add() + proto_tag_add.value = tag_string + + elif attribute_value is not None: + setattr(proto, attribute_name, attribute_value) + + comment = getattr(event_tag, 'comment', '') + if comment: + proto.comment = comment + + color = getattr(event_tag, 'color', '') + if color: + proto.color = color + + return proto + + @classmethod + def WriteSerialized(cls, event_tag): + """Writes an event tag to serialized form. + + Args: + event_tag: an event tag (instance of EventTag). + + Returns: + A protobuf string containing the serialized form. + + Raises: + RuntimeError: when the event tag is not valid for serialization. + """ + if not event_tag.IsValidForSerialization(): + raise RuntimeError(u'Invalid tag object not valid for serialization.') + + proto = cls.WriteSerializedObject(event_tag) + return proto.SerializeToString() + + +class ProtobufPreprocessObjectSerializer(interface.PreprocessObjectSerializer): + """Class that implements the protobuf preprocessing object serializer.""" + + @classmethod + def ReadSerializedObject(cls, proto): + """Reads a preprocess object from serialized form. + + Args: + proto: a protobuf object containing the serialized form (instance of + plaso_storage_pb2.Preprocess). + + Returns: + A preprocessing object (instance of PreprocessObject). + """ + pre_obj = event.PreprocessObject() + + for attribute in proto.attributes: + key, value = ProtobufEventAttributeSerializer.ReadSerializedObject( + attribute) + if key == 'zone': + pre_obj.SetTimezone(value) + else: + setattr(pre_obj, key, value) + + if proto.HasField('counter'): + dict_object = ProtobufEventAttributeSerializer.ReadSerializedDictObject( + proto.counter) + pre_obj.SetCounterValues(dict_object) + + if proto.HasField('plugin_counter'): + dict_object = ProtobufEventAttributeSerializer.ReadSerializedDictObject( + proto.plugin_counter) + pre_obj.SetPluginCounterValues(dict_object) + + if proto.HasField('store_range'): + range_list = [] + for value in proto.store_range.values: + if value.HasField('integer'): + range_list.append(value.integer) + pre_obj.store_range = (range_list[0], range_list[-1]) + + if proto.HasField('collection_information'): + dict_object = ProtobufEventAttributeSerializer.ReadSerializedDictObject( + proto.collection_information) + pre_obj.SetCollectionInformationValues(dict_object) + + return pre_obj + + @classmethod + def ReadSerialized(cls, proto_string): + """Reads a preprocess object from serialized form. + + Args: + proto_string: a protobuf string containing the serialized form. + + Returns: + A preprocessing object (instance of PreprocessObject). + """ + proto = plaso_storage_pb2.PreProcess() + proto.ParseFromString(proto_string) + + return cls.ReadSerializedObject(proto) + + @classmethod + def WriteSerializedObject(cls, pre_obj): + """Writes a preprocessing object to serialized form. + + Args: + pre_obj: a preprocessing object (instance of PreprocessObject). + + Returns: + A protobuf object containing the serialized form (instance of + plaso_storage_pb2.PreProcess). + """ + proto = plaso_storage_pb2.PreProcess() + + for attribute, value in pre_obj.__dict__.items(): + if attribute == 'collection_information': + zone = value.get('configured_zone', '') + if zone and hasattr(zone, 'zone'): + value['configured_zone'] = zone.zone + ProtobufEventAttributeSerializer.WriteSerializedDictObject( + proto, 'collection_information', value) + elif attribute == 'counter': + value_dict = dict(value.items()) + ProtobufEventAttributeSerializer.WriteSerializedDictObject( + proto, 'counter', value_dict) + elif attribute == 'plugin_counter': + value_dict = dict(value.items()) + ProtobufEventAttributeSerializer.WriteSerializedDictObject( + proto, 'plugin_counter', value_dict) + elif attribute == 'store_range': + range_proto = plaso_storage_pb2.Array() + range_start = range_proto.values.add() + range_start.integer = int(value[0]) + range_end = range_proto.values.add() + range_end.integer = int(value[-1]) + proto.store_range.MergeFrom(range_proto) + else: + if attribute == 'zone': + value = value.zone + if isinstance(value, (bool, int, float, long)) or value: + proto_attribute = proto.attributes.add() + ProtobufEventAttributeSerializer.WriteSerializedObject( + proto_attribute, attribute, value) + + return proto + + @classmethod + def WriteSerialized(cls, pre_obj): + """Writes a preprocessing object to serialized form. + + Args: + pre_obj: a preprocessing object (instance of PreprocessObject). + + Returns: + A protobuf string containing the serialized form. + """ + proto = cls.WriteSerializedObject(pre_obj) + return proto.SerializeToString() diff --git a/plaso/serializer/protobuf_serializer_test.py b/plaso/serializer/protobuf_serializer_test.py new file mode 100644 index 0000000..d72ea9a --- /dev/null +++ b/plaso/serializer/protobuf_serializer_test.py @@ -0,0 +1,211 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the serializer object implementation using protobuf.""" + +import unittest + +from plaso.lib import event +from plaso.proto import plaso_storage_pb2 +from plaso.serializer import protobuf_serializer + + +class ProtobufAnalysisReportSerializerTest(unittest.TestCase): + """Tests for the protobuf analysis report serializer object.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + # TODO: add an analysis report test. + pass + + def testReadSerialized(self): + """Test the read serialized functionality.""" + # TODO: add an analysis report test. + pass + + def testWriteSerialized(self): + """Test the write serialized functionality.""" + # TODO: add an analysis report test. + pass + + +class ProtobufEventObjectSerializerTest(unittest.TestCase): + """Tests for the protobuf event object serializer object.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + proto = plaso_storage_pb2.EventObject() + + proto.data_type = 'test:event2' + proto.timestamp = 1234124 + proto.timestamp_desc = 'Written' + + serializer = protobuf_serializer.ProtobufEventAttributeSerializer + + proto_attribute = proto.attributes.add() + serializer.WriteSerializedObject(proto_attribute, 'zero_integer', 0) + + proto_attribute = proto.attributes.add() + dict_object = { + 'a': 'not b', 'c': 34, 'list': ['sf', 234], 'an': [234, 32]} + serializer.WriteSerializedObject(proto_attribute, 'my_dict', dict_object) + + proto_attribute = proto.attributes.add() + tuple_object = ( + 'some item', [234, 52, 15], {'a': 'not a', 'b': 'not b'}, 35) + serializer.WriteSerializedObject(proto_attribute, 'a_tuple', tuple_object) + + proto_attribute = proto.attributes.add() + list_object = ['asf', 4234, 2, 54, 'asf'] + serializer.WriteSerializedObject(proto_attribute, 'my_list', list_object) + + proto_attribute = proto.attributes.add() + serializer.WriteSerializedObject( + proto_attribute, 'unicode_string', u'And I\'m a unicorn.') + + proto_attribute = proto.attributes.add() + serializer.WriteSerializedObject(proto_attribute, 'integer', 34) + + proto_attribute = proto.attributes.add() + serializer.WriteSerializedObject(proto_attribute, 'string', 'Normal string') + + proto.uuid = '5a78777006de4ddb8d7bbe12ab92ccf8' + + self._proto_string = proto.SerializeToString() + + def testReadSerialized(self): + """Test the read serialized functionality.""" + serializer = protobuf_serializer.ProtobufEventObjectSerializer + event_object = serializer.ReadSerialized(self._proto_string) + + # An integer value containing 0 should get stored. + self.assertTrue(hasattr(event_object, 'zero_integer')) + + attribute_value = getattr(event_object, 'integer', 0) + self.assertEquals(attribute_value, 34) + + attribute_value = getattr(event_object, 'my_list', []) + self.assertEquals(len(attribute_value), 5) + + attribute_value = getattr(event_object, 'string', '') + self.assertEquals(attribute_value, 'Normal string') + + attribute_value = getattr(event_object, 'unicode_string', u'') + self.assertEquals(attribute_value, u'And I\'m a unicorn.') + + attribute_value = getattr(event_object, 'a_tuple', ()) + self.assertEquals(len(attribute_value), 4) + + def testWriteSerialized(self): + """Test the write serialized functionality.""" + event_object = event.EventObject() + + event_object.data_type = 'test:event2' + event_object.timestamp = 1234124 + event_object.timestamp_desc = 'Written' + # Prevent the event object for generating its own UUID. + event_object.uuid = '5a78777006de4ddb8d7bbe12ab92ccf8' + + event_object.empty_string = u'' + event_object.zero_integer = 0 + event_object.integer = 34 + event_object.string = 'Normal string' + event_object.unicode_string = u'And I\'m a unicorn.' + event_object.my_list = ['asf', 4234, 2, 54, 'asf'] + event_object.my_dict = { + 'a': 'not b', 'c': 34, 'list': ['sf', 234], 'an': [234, 32]} + event_object.a_tuple = ( + 'some item', [234, 52, 15], {'a': 'not a', 'b': 'not b'}, 35) + event_object.null_value = None + + serializer = protobuf_serializer.ProtobufEventObjectSerializer + proto_string = serializer.WriteSerialized(event_object) + self.assertEquals(proto_string, self._proto_string) + + event_object = serializer.ReadSerialized(proto_string) + + # An empty string should not get stored. + self.assertFalse(hasattr(event_object, 'empty_string')) + + # A None (or Null) value should not get stored. + self.assertFalse(hasattr(event_object, 'null_value')) + + +class ProtobufEventTagSerializerTest(unittest.TestCase): + """Tests for the protobuf event tag serializer object.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + proto = plaso_storage_pb2.EventTagging() + proto.store_number = 234 + proto.store_index = 18 + proto.comment = u'My first comment.' + proto.color = u'Red' + proto_tag = proto.tags.add() + proto_tag.value = u'Malware' + proto_tag = proto.tags.add() + proto_tag.value = u'Common' + + self._proto_string = proto.SerializeToString() + + def testReadSerialized(self): + """Test the read serialized functionality.""" + serializer = protobuf_serializer.ProtobufEventTagSerializer + event_tag = serializer.ReadSerialized(self._proto_string) + + self.assertEquals(event_tag.color, u'Red') + self.assertEquals(event_tag.comment, u'My first comment.') + self.assertEquals(event_tag.store_index, 18) + self.assertEquals(len(event_tag.tags), 2) + self.assertEquals(event_tag.tags, [u'Malware', u'Common']) + + def testWriteSerialized(self): + """Test the write serialized functionality.""" + event_tag = event.EventTag() + + event_tag.store_number = 234 + event_tag.store_index = 18 + event_tag.comment = u'My first comment.' + event_tag.color = u'Red' + event_tag.tags = [u'Malware', u'Common'] + + serializer = protobuf_serializer.ProtobufEventTagSerializer + proto_string = serializer.WriteSerialized(event_tag) + self.assertEquals(proto_string, self._proto_string) + + +class ProtobufPreprocessObjectSerializerTest(unittest.TestCase): + """Tests for the protobuf preprocess object serializer object.""" + + def setUp(self): + """Sets up the needed objects used throughout the test.""" + # TODO: add a preprocess object test. + pass + + def testReadSerialized(self): + """Test the read serialized functionality.""" + # TODO: add a preprocess object test. + pass + + def testWriteSerialized(self): + """Test the write serialized functionality.""" + # TODO: add a preprocess object test. + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/unix/__init__.py b/plaso/unix/__init__.py new file mode 100644 index 0000000..ae78399 --- /dev/null +++ b/plaso/unix/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/unix/bsmtoken.py b/plaso/unix/bsmtoken.py new file mode 100644 index 0000000..15de3b6 --- /dev/null +++ b/plaso/unix/bsmtoken.py @@ -0,0 +1,810 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Basic Security Module definitions.""" + +# Arbitrary tokens. +# Type of data to print in a BSM_TOKEN_DATA. +BSM_TOKEN_DATA_TYPE = { + 0: u'AUR_CHAR', + 1: u'AUR_SHORT', + 2: u'AUR_INT32'} + +BSM_TOKEN_DATA_PRINT = { + 0: u'Binary', + 1: u'Octal', + 2: u'Decimal', + 3: u'Hexadecimal', + 4: u'String'} + +# BSM identification errors. +BSM_ERRORS = { + 0: u'Success', + 1: u'Operation not permitted', + 2: u'No such file or directory', + 3: u'No such process', + 4: u'Interrupted system call', + 5: u'Input/output error', + 6: u'Device not configured', + 7: u'Argument list too long', + 8: u'Exec format error', + 9: u'Bad file descriptor', + 10: u'No child processes', + 11: u'Resource temporarily unavailable', + 12: u'Cannot allocate memory', + 13: u'Permission denied', + 14: u'Bad address', + 15: u'Block device required', + 16: u'Device busy', + 17: u'File exists', + 18: u'ross-device link', + 19: u'Operation not supported by device', + 20: u'Not a directory', + 21: u'Is a directory', + 22: u'Invalid argument', + 23: u'Too many open files in system', + 24: u'Too many open files', + 25: u'Inappropriate ioctl for device', + 26: u'Text file busy', + 27: u'File too large', + 28: u'No space left on device', + 29: u'Illegal seek', + 30: u'Read-only file system', + 31: u'Too many links', + 32: u'Broken pipe', + 33: u'Numerical argument out of domain', + 34: u'Result too large', + 35: u'No message of desired type', + 36: u'Identifier removed', + 45: u'Resource deadlock avoided', + 46: u'No locks available', + 47: u'Operation canceled', + 48: u'Operation not supported', + 49: u'Disc quota exceeded', + 66: u'Too many levels of remote in path', + 67: u'Link has been severed', + 71: u'Protocol error', + 74: u'Multihop attempted', + 77: u'Bad message', + 78: u'File name too long', + 79: u'Value too large to be stored in data type', + 88: u'Illegal byte sequence', + 89: u'Function not implemented', + 90: u'Too many levels of symbolic links', + 91: u'Restart syscall', + 93: u'Directory not empty', + 94: u'Too many users', + 95: u'Socket operation on non-socket', + 96: u'Destination address required', + 97: u'Message too long', + 98: u'Protocol wrong type for socket', + 99: u'Protocol not available', + 120: u'Protocol not supported', + 121: u'Socket type not supported', + 122: u'Operation not supported', + 123: u'Protocol family not supported', + 124: u'Address family not supported by protocol family', + 125: u'Address already in use', + 126: u'Can\'t assign requested address', + 127: u'Network is down', + 128: u'Network unreachable', + 129: u'Network dropped connection on reset', + 130: u'Software caused connection abort', + 131: u'Connection reset by peer', + 132: u'No buffer space available', + 133: u'Socket is already connected', + 134: u'Socket is not connected', + 143: u'Can\'t send after socket shutdown', + 144: u'Too many references: can\'t splice', + 145: u'Operation timed out', + 146: u'Connection refused', + 147: u'Host is down', + 148: u'No route to host', + 149: u'Operation already in progress', + 150: u'Operation now in progress', + 151: u'Stale NFS file handle', + 190: u'PROCLIM', + 191: u'BADRPC', + 192: u'RPCMISMATCH', + 193: u'PROGUNAVAIL', + 194: u'PROGMISMATCH', + 195: u'PROCUNAVAIL', + 196: u'FTYPE', + 197: u'AUTH', + 198: u'NEEDAUTH', + 199: u'NOATTR', + 200: u'DOOFUS', + 201: u'USTRETURN', + 202: u'NOIOCTL', + 203: u'DIRIOCTL', + 204: u'PWROFF', + 205: u'DEVERR', + 206: u'BADEXEC', + 207: u'BADARCH', + 208: u'SHLIBVERS', + 209: u'BADMACHO', + 210: u'POLICY'} + +# BSM network protocols. The informations comes from OpenBSD project, +# it might not be exacly. +BSM_PROTOCOLS = { + 0: u'UNSPEC', + 1: u'LOCAL', + 2: u'INET', + 3: u'IMPLINK', + 4: u'PUP', + 5: u'CHAOS', + 6: u'NS', + 8: u'ECMA', + 9: u'DATAKIT', + 10: u'CCITT', + 11: u'SNA', + 12: u'DECnet', + 13: u'DLI', + 14: u'LAT', + 15: u'HYLINK', + 16: u'APPLETALK', + 19: u'OSI', + 23: u'IPX', + 24: u'ROUTE', + 25: u'LINK', + 26: u'INET6', + 27: u'KEY', + 500: u'NETBIOS', + 501: u'ISO', + 502: u'XTP', + 503: u'COIP', + 504: u'CNT', + 505: u'RTIP', + 506: u'SIP', + 507: u'PIP', + 508: u'ISDN', + 509: u'E164', + 510: u'NATM', + 511: u'ATM', + 512: u'NETGRAPH', + 513: u'SLOW', + 514: u'CLUSTER', + 515: u'ARP', + 516: u'BLUETOOTH'} + +# key event types. +BSM_AUDIT_EVENT = { + 0: u'indir system call', + 1: u'exit(2)', + 2: u'fork(2)', + 3: u'open(2) - attr only', + 4: u'creat(2)', + 5: u'link(2)', + 6: u'unlink(2)', + 7: u'exec(2)', + 8: u'chdir(2)', + 9: u'mknod(2)', + 10: u'chmod(2)', + 11: u'chown(2)', + 12: u'umount(2) - old version', + 13: u'junk', + 14: u'access(2)', + 15: u'kill(2)', + 16: u'stat(2)', + 17: u'lstat(2)', + 18: u'acct(2)', + 19: u'mctl(2)', + 20: u'reboot(2)', + 21: u'symlink(2)', + 22: u'readlink(2)', + 23: u'execve(2)', + 24: u'chroot(2)', + 25: u'vfork(2)', + 26: u'setgroups(2)', + 27: u'setpgrp(2)', + 28: u'swapon(2)', + 29: u'sethostname(2)', + 30: u'fcntl(2)', + 31: u'setpriority(2)', + 32: u'connect(2)', + 33: u'accept(2)', + 34: u'bind(2)', + 35: u'setsockopt(2)', + 36: u'vtrace(2)', + 37: u'settimeofday(2)', + 38: u'fchown(2)', + 39: u'fchmod(2)', + 40: u'setreuid(2)', + 41: u'setregid(2)', + 42: u'rename(2)', + 43: u'truncate(2)', + 44: u'ftruncate(2)', + 45: u'flock(2)', + 46: u'shutdown(2)', + 47: u'mkdir(2)', + 48: u'rmdir(2)', + 49: u'utimes(2)', + 50: u'adjtime(2)', + 51: u'setrlimit(2)', + 52: u'killpg(2)', + 53: u'nfs_svc(2)', + 54: u'statfs(2)', + 55: u'fstatfs(2)', + 56: u'unmount(2)', + 57: u'async_daemon(2)', + 58: u'nfs_getfh(2)', + 59: u'setdomainname(2)', + 60: u'quotactl(2)', + 61: u'exportfs(2)', + 62: u'mount(2)', + 63: u'semsys(2)', + 64: u'msgsys(2)', + 65: u'shmsys(2)', + 66: u'bsmsys(2)', + 67: u'rfssys(2)', + 68: u'fchdir(2)', + 69: u'fchroot(2)', + 70: u'vpixsys(2)', + 71: u'pathconf(2)', + 72: u'open(2) - read', + 73: u'open(2) - read,creat', + 74: u'open(2) - read,trunc', + 75: u'open(2) - read,creat,trunc', + 76: u'open(2) - write', + 77: u'open(2) - write,creat', + 78: u'open(2) - write,trunc', + 79: u'open(2) - write,creat,trunc', + 80: u'open(2) - read,write', + 81: u'open(2) - read,write,creat', + 82: u'open(2) - read,write,trunc', + 83: u'open(2) - read,write,creat,trunc', + 84: u'msgctl(2) - illegal command', + 85: u'msgctl(2) - IPC_RMID command', + 86: u'msgctl(2) - IPC_SET command', + 87: u'msgctl(2) - IPC_STAT command', + 88: u'msgget(2)', + 89: u'msgrcv(2)', + 90: u'msgsnd(2)', + 91: u'shmctl(2) - illegal command', + 92: u'shmctl(2) - IPC_RMID command', + 93: u'shmctl(2) - IPC_SET command', + 94: u'shmctl(2) - IPC_STAT command', + 95: u'shmget(2)', + 96: u'shmat(2)', + 97: u'shmdt(2)', + 98: u'semctl(2) - illegal command', + 99: u'semctl(2) - IPC_RMID command', + 100: u'semctl(2) - IPC_SET command', + 101: u'semctl(2) - IPC_STAT command', + 102: u'semctl(2) - GETNCNT command', + 103: u'semctl(2) - GETPID command', + 104: u'semctl(2) - GETVAL command', + 105: u'semctl(2) - GETALL command', + 106: u'semctl(2) - GETZCNT command', + 107: u'semctl(2) - SETVAL command', + 108: u'semctl(2) - SETALL command', + 109: u'semget(2)', + 110: u'semop(2)', + 111: u'process dumped core', + 112: u'close(2)', + 113: u'system booted', + 114: u'async_daemon(2) exited', + 115: u'nfssvc(2) exited', + 128: u'writel(2)', + 129: u'writevl(2)', + 130: u'getauid(2)', + 131: u'setauid(2)', + 132: u'getaudit(2)', + 133: u'setaudit(2)', + 134: u'getuseraudit(2)', + 135: u'setuseraudit(2)', + 136: u'auditsvc(2)', + 137: u'audituser(2)', + 138: u'auditon(2)', + 139: u'auditon(2) - GETTERMID command', + 140: u'auditon(2) - SETTERMID command', + 141: u'auditon(2) - GPOLICY command', + 142: u'auditon(2) - SPOLICY command', + 143: u'auditon(2) - GESTATE command', + 144: u'auditon(2) - SESTATE command', + 145: u'auditon(2) - GQCTRL command', + 146: u'auditon(2) - SQCTRL command', + 147: u'getkernstate(2)', + 148: u'setkernstate(2)', + 149: u'getportaudit(2)', + 150: u'auditstat(2)', + 151: u'revoke(2)', + 152: u'Solaris AUE_MAC', + 153: u'enter prom', + 154: u'exit prom', + 155: u'Solaris AUE_IFLOAT', + 156: u'Solaris AUE_PFLOAT', + 157: u'Solaris AUE_UPRIV', + 158: u'ioctl(2)', + 173: u'one-sided session record', + 174: u'msggetl(2)', + 175: u'msgrcvl(2)', + 176: u'msgsndl(2)', + 177: u'semgetl(2)', + 178: u'shmgetl(2)', + 183: u'socket(2)', + 184: u'sendto(2)', + 185: u'pipe(2)', + 186: u'socketpair(2)', + 187: u'send(2)', + 188: u'sendmsg(2)', + 189: u'recv(2)', + 190: u'recvmsg(2)', + 191: u'recvfrom(2)', + 192: u'read(2)', + 193: u'getdents(2)', + 194: u'lseek(2)', + 195: u'write(2)', + 196: u'writev(2)', + 197: u'nfs server', + 198: u'readv(2)', + 199: u'Solaris old stat(2)', + 200: u'setuid(2)', + 201: u'old stime(2)', + 202: u'old utime(2)', + 203: u'old nice(2)', + 204: u'Solaris old setpgrp(2)', + 205: u'setgid(2)', + 206: u'readl(2)', + 207: u'readvl(2)', + 208: u'fstat(2)', + 209: u'dup2(2)', + 210: u'mmap(2)', + 211: u'audit(2)', + 212: u'Solaris priocntlsys(2)', + 213: u'munmap(2)', + 214: u'setegid(2)', + 215: u'seteuid(2)', + 216: u'putmsg(2)', + 217: u'getmsg(2)', + 218: u'putpmsg(2)', + 219: u'getpmsg(2)', + 220: u'audit system calls place holder', + 221: u'auditon(2) - get kernel mask', + 222: u'auditon(2) - set kernel mask', + 223: u'auditon(2) - get cwd', + 224: u'auditon(2) - get car', + 225: u'auditon(2) - get audit statistics', + 226: u'auditon(2) - reset audit statistics', + 227: u'auditon(2) - set mask per uid', + 228: u'auditon(2) - set mask per session ID', + 229: u'auditon(2) - get audit state', + 230: u'auditon(2) - set audit state', + 231: u'auditon(2) - get event class', + 232: u'auditon(2) - set event class', + 233: u'utssys(2) - fusers', + 234: u'statvfs(2)', + 235: u'xstat(2)', + 236: u'lxstat(2)', + 237: u'lchown(2)', + 238: u'memcntl(2)', + 239: u'sysinfo(2)', + 240: u'xmknod(2)', + 241: u'fork1(2)', + 242: u'modctl(2) system call place holder', + 243: u'modctl(2) - load module', + 244: u'modctl(2) - unload module', + 245: u'modctl(2) - configure module', + 246: u'modctl(2) - bind module', + 247: u'getmsg-accept', + 248: u'putmsg-connect', + 249: u'putmsg-send', + 250: u'getmsg-receive', + 251: u'acl(2) - SETACL comand', + 252: u'facl(2) - SETACL command', + 253: u'doorfs(2) - system call place holder', + 254: u'doorfs(2) - DOOR_CALL', + 255: u'doorfs(2) - DOOR_RETURN', + 256: u'doorfs(2) - DOOR_CREATE', + 257: u'doorfs(2) - DOOR_REVOKE', + 258: u'doorfs(2) - DOOR_INFO', + 259: u'doorfs(2) - DOOR_CRED', + 260: u'doorfs(2) - DOOR_BIND', + 261: u'doorfs(2) - DOOR_UNBIND', + 262: u'p_online(2)', + 263: u'processor_bind(2)', + 264: u'inst_sync(2)', + 265: u'configure socket', + 266: u'setaudit_addr(2)', + 267: u'getaudit_addr(2)', + 268: u'Solaris umount(2)', + 269: u'fsat(2) - place holder', + 270: u'openat(2) - read', + 271: u'openat(2) - read,creat', + 272: u'openat(2) - read,trunc', + 273: u'openat(2) - read,creat,trunc', + 274: u'openat(2) - write', + 275: u'openat(2) - write,creat', + 276: u'openat(2) - write,trunc', + 277: u'openat(2) - write,creat,trunc', + 278: u'openat(2) - read,write', + 279: u'openat(2) - read,write,create', + 280: u'openat(2) - read,write,trunc', + 281: u'openat(2) - read,write,creat,trunc', + 282: u'renameat(2)', + 283: u'fstatat(2)', + 284: u'fchownat(2)', + 285: u'futimesat(2)', + 286: u'unlinkat(2)', + 287: u'clock_settime(2)', + 288: u'ntp_adjtime(2)', + 289: u'setppriv(2)', + 290: u'modctl(2) - configure device policy', + 291: u'modctl(2) - configure additional privilege', + 292: u'kernel cryptographic framework', + 293: u'configure kernel SSL', + 294: u'brandsys(2)', + 295: u'Add IPsec policy rule', + 296: u'Delete IPsec policy rule', + 297: u'Clone IPsec policy', + 298: u'Flip IPsec policy', + 299: u'Flush IPsec policy rules', + 300: u'Update IPsec algorithms', + 301: u'portfs', + 302: u'ptrace(2)', + 303: u'chflags(2)', + 304: u'fchflags(2)', + 305: u'profil(2)', + 306: u'ktrace(2)', + 307: u'setlogin(2)', + 308: u'reboot(2)', + 309: u'revoke(2)', + 310: u'umask(2)', + 311: u'mprotect(2)', + 312: u'setpriority(2)', + 313: u'settimeofday(2)', + 314: u'flock(2)', + 315: u'mkfifo(2)', + 316: u'poll(2)', + 317: u'socketpair(2)', + 318: u'futimes(2)', + 319: u'setsid(2)', + 320: u'setprivexec(2)', + 321: u'nfssvc(2)', + 322: u'getfh(2)', + 323: u'quotactl(2)', + 324: u'add_profil()', + 325: u'kdebug_trace()', + 326: u'fstat(2)', + 327: u'fpathconf(2)', + 328: u'getdirentries(2)', + 329: u'truncate(2)', + 330: u'ftruncate(2)', + 331: u'sysctl(3)', + 332: u'mlock(2)', + 333: u'munlock(2)', + 334: u'undelete(2)', + 335: u'getattrlist()', + 336: u'setattrlist()', + 337: u'getdirentriesattr()', + 338: u'exchangedata()', + 339: u'searchfs()', + 340: u'minherit(2)', + 341: u'semconfig()', + 342: u'sem_open(2)', + 343: u'sem_close(2)', + 344: u'sem_unlink(2)', + 345: u'shm_open(2)', + 346: u'shm_unlink(2)', + 347: u'load_shared_file()', + 348: u'reset_shared_file()', + 349: u'new_system_share_regions()', + 350: u'pthread_kill(2)', + 351: u'pthread_sigmask(2)', + 352: u'auditctl(2)', + 353: u'rfork(2)', + 354: u'lchmod(2)', + 355: u'swapoff(2)', + 356: u'init_process()', + 357: u'map_fd()', + 358: u'task_for_pid()', + 359: u'pid_for_task()', + 360: u'sysctl() - non-admin', + 361: u'copyfile()', + 43001: u'getfsstat(2)', + 43002: u'ptrace(2)', + 43003: u'chflags(2)', + 43004: u'fchflags(2)', + 43005: u'profil(2)', + 43006: u'ktrace(2)', + 43007: u'setlogin(2)', + 43008: u'revoke(2)', + 43009: u'umask(2)', + 43010: u'mprotect(2)', + 43011: u'mkfifo(2)', + 43012: u'poll(2)', + 43013: u'futimes(2)', + 43014: u'setsid(2)', + 43015: u'setprivexec(2)', + 43016: u'add_profil()', + 43017: u'kdebug_trace()', + 43018: u'fstat(2)', + 43019: u'fpathconf(2)', + 43020: u'getdirentries(2)', + 43021: u'sysctl(3)', + 43022: u'mlock(2)', + 43023: u'munlock(2)', + 43024: u'undelete(2)', + 43025: u'getattrlist()', + 43026: u'setattrlist()', + 43027: u'getdirentriesattr()', + 43028: u'exchangedata()', + 43029: u'searchfs()', + 43030: u'minherit(2)', + 43031: u'semconfig()', + 43032: u'sem_open(2)', + 43033: u'sem_close(2)', + 43034: u'sem_unlink(2)', + 43035: u'shm_open(2)', + 43036: u'shm_unlink(2)', + 43037: u'load_shared_file()', + 43038: u'reset_shared_file()', + 43039: u'new_system_share_regions()', + 43040: u'pthread_kill(2)', + 43041: u'pthread_sigmask(2)', + 43042: u'auditctl(2)', + 43043: u'rfork(2)', + 43044: u'lchmod(2)', + 43045: u'swapoff(2)', + 43046: u'init_process()', + 43047: u'map_fd()', + 43048: u'task_for_pid()', + 43049: u'pid_for_task()', + 43050: u'sysctl() - non-admin', + 43051: u'copyfile(2)', + 43052: u'lutimes(2)', + 43053: u'lchflags(2)', + 43054: u'sendfile(2)', + 43055: u'uselib(2)', + 43056: u'getresuid(2)', + 43057: u'setresuid(2)', + 43058: u'getresgid(2)', + 43059: u'setresgid(2)', + 43060: u'wait4(2)', + 43061: u'lgetfh(2)', + 43062: u'fhstatfs(2)', + 43063: u'fhopen(2)', + 43064: u'fhstat(2)', + 43065: u'jail(2)', + 43066: u'eaccess(2)', + 43067: u'kqueue(2)', + 43068: u'kevent(2)', + 43069: u'fsync(2)', + 43070: u'nmount(2)', + 43071: u'bdflush(2)', + 43072: u'setfsuid(2)', + 43073: u'setfsgid(2)', + 43074: u'personality(2)', + 43075: u'getscheduler(2)', + 43076: u'setscheduler(2)', + 43077: u'prctl(2)', + 43078: u'getcwd(2)', + 43079: u'capget(2)', + 43080: u'capset(2)', + 43081: u'pivot_root(2)', + 43082: u'rtprio(2)', + 43083: u'sched_getparam(2)', + 43084: u'sched_setparam(2)', + 43085: u'sched_get_priority_max(2)', + 43086: u'sched_get_priority_min(2)', + 43087: u'sched_rr_get_interval(2)', + 43088: u'acl_get_file(2)', + 43089: u'acl_set_file(2)', + 43090: u'acl_get_fd(2)', + 43091: u'acl_set_fd(2)', + 43092: u'acl_delete_file(2)', + 43093: u'acl_delete_fd(2)', + 43094: u'acl_aclcheck_file(2)', + 43095: u'acl_aclcheck_fd(2)', + 43096: u'acl_get_link(2)', + 43097: u'acl_set_link(2)', + 43098: u'acl_delete_link(2)', + 43099: u'acl_aclcheck_link(2)', + 43100: u'sysarch(2)', + 43101: u'extattrctl(2)', + 43102: u'extattr_get_file(2)', + 43103: u'extattr_set_file(2)', + 43104: u'extattr_list_file(2)', + 43105: u'extattr_delete_file(2)', + 43106: u'extattr_get_fd(2)', + 43107: u'extattr_set_fd(2)', + 43108: u'extattr_list_fd(2)', + 43109: u'extattr_delete_fd(2)', + 43110: u'extattr_get_link(2)', + 43111: u'extattr_set_link(2)', + 43112: u'extattr_list_link(2)', + 43113: u'extattr_delete_link(2)', + 43114: u'kenv(8)', + 43115: u'jail_attach(2)', + 43116: u'sysctl(3)', + 43117: u'linux ioperm', + 43118: u'readdir(3)', + 43119: u'linux iopl', + 43120: u'linux vm86', + 43121: u'mac_get_proc(2)', + 43122: u'mac_set_proc(2)', + 43123: u'mac_get_fd(2)', + 43124: u'mac_get_file(2)', + 43125: u'mac_set_fd(2)', + 43126: u'mac_set_file(2)', + 43127: u'mac_syscall(2)', + 43128: u'mac_get_pid(2)', + 43129: u'mac_get_link(2)', + 43130: u'mac_set_link(2)', + 43131: u'mac_execve(2)', + 43132: u'getpath_fromfd(2)', + 43133: u'getpath_fromaddr(2)', + 43134: u'mq_open(2)', + 43135: u'mq_setattr(2)', + 43136: u'mq_timedreceive(2)', + 43137: u'mq_timedsend(2)', + 43138: u'mq_notify(2)', + 43139: u'mq_unlink(2)', + 43140: u'listen(2)', + 43141: u'mlockall(2)', + 43142: u'munlockall(2)', + 43143: u'closefrom(2)', + 43144: u'fexecve(2)', + 43145: u'faccessat(2)', + 43146: u'fchmodat(2)', + 43147: u'linkat(2)', + 43148: u'mkdirat(2)', + 43149: u'mkfifoat(2)', + 43150: u'mknodat(2)', + 43151: u'readlinkat(2)', + 43152: u'symlinkat(2)', + 43153: u'mac_getfsstat(2)', + 43154: u'mac_get_mount(2)', + 43155: u'mac_get_lcid(2)', + 43156: u'mac_get_lctx(2)', + 43157: u'mac_set_lctx(2)', + 43158: u'mac_mount(2)', + 43159: u'getlcid(2)', + 43160: u'setlcid(2)', + 43161: u'taskname_for_pid()', + 43162: u'access_extended(2)', + 43163: u'chmod_extended(2)', + 43164: u'fchmod_extended(2)', + 43165: u'fstat_extended(2)', + 43166: u'lstat_extended(2)', + 43167: u'mkdir_extended(2)', + 43168: u'mkfifo_extended(2)', + 43169: u'open_extended(2) - attr only', + 43170: u'open_extended(2) - read', + 43171: u'open_extended(2) - read,creat', + 43172: u'open_extended(2) - read,trunc', + 43173: u'open_extended(2) - read,creat,trunc', + 43174: u'open_extended(2) - write', + 43175: u'open_extended(2) - write,creat', + 43176: u'open_extended(2) - write,trunc', + 43177: u'open_extended(2) - write,creat,trunc', + 43178: u'open_extended(2) - read,write', + 43179: u'open_extended(2) - read,write,creat', + 43180: u'open_extended(2) - read,write,trunc', + 43181: u'open_extended(2) - read,write,creat,trunc', + 43182: u'stat_extended(2)', + 43183: u'umask_extended(2)', + 43184: u'openat(2) - attr only', + 43185: u'posix_openpt(2)', + 43186: u'cap_new(2)', + 43187: u'cap_getrights(2)', + 43188: u'cap_enter(2)', + 43189: u'cap_getmode(2)', + 43190: u'posix_spawn(2)', + 43191: u'fsgetpath(2)', + 43192: u'pread(2)', + 43193: u'pwrite(2)', + 43194: u'fsctl()', + 43195: u'ffsctl()', + 43196: u'lpathconf(2)', + 43197: u'pdfork(2)', + 43198: u'pdkill(2)', + 43199: u'pdgetpid(2)', + 43200: u'pdwait(2)', + 44901: u'session start', + 44902: u'session update', + 44903: u'session end', + 44904: u'session close', + 6144: u'at-create atjob', + 6145: u'at-delete atjob (at or atrm)', + 6146: u'at-permission', + 6147: u'cron-invoke', + 6148: u'crontab-crontab created', + 6149: u'crontab-crontab deleted', + 6150: u'crontab-permission', + 6151: u'inetd connection', + 6152: u'login - local', + 6153: u'logout - local', + 6154: u'login - telnet', + 6155: u'login - rlogin', + 6156: u'mount', + 6157: u'unmount', + 6158: u'rsh access', + 6159: u'su(1)', + 6160: u'system halt', + 6161: u'system reboot', + 6162: u'rexecd', + 6163: u'passwd', + 6164: u'rexd', + 6165: u'ftp access', + 6166: u'init', + 6167: u'uadmin', + 6168: u'system shutdown', + 6170: u'crontab-modify', + 6171: u'ftp logout', + 6172: u'login - ssh', + 6173: u'role login', + 6180: u' profile command', + 6181: u'add filesystem', + 6182: u'delete filesystem', + 6183: u'modify filesystem', + 6200: u'allocate-device success', + 6201: u'allocate-device failure', + 6202: u'deallocate-device success', + 6203: u'deallocate-device failure', + 6204: u'allocate-list devices success', + 6205: u'allocate-list devices failure', + 6207: u'create user', + 6208: u'modify user', + 6209: u'delete user', + 6210: u'disable user', + 6211: u'enable user', + 6212: u'newgrp login', + 6213: u'admin login', + 6214: u'authenticated kadmind request', + 6215: u'unauthenticated kadmind req', + 6216: u'kdc authentication svc request', + 6217: u'kdc tkt-grant svc request', + 6218: u'kdc tgs 2ndtkt mismtch', + 6219: u'kdc tgs issue alt tgt', + 6300: u'sudo(1)', + 6501: u'modify password', + 6511: u'create group', + 6512: u'delete group', + 6513: u'modify group', + 6514: u'add to group', + 6515: u'remove from group', + 6521: u'revoke object priv', + 6600: u'loginwindow login', + 6601: u'loginwindow logout', + 7000: u'user authentication', + 7001: u'SecSrvr connection setup', + 7002: u'SecSrvr AuthEngine', + 7003: u'SecSrvr authinternal mech', + 32800: u'OpenSSH login', + 45000: u'audit startup', + 45001: u'audit shutdown', + 45014: u'modify password', + 45015: u'create group', + 45016: u'delete group', + 45017: u'modify group', + 45018: u'add to group', + 45019: u'remove from group', + 45020: u'revoke object priv', + 45021: u'loginwindow login', + 45022: u'loginwindow logout', + 45023: u'user authentication', + 45024: u'SecSrvr connection setup', + 45025: u'SecSrvr AuthEngine', + 45026: u'SecSrvr authinternal mech', + 45027: u'Calife', + 45028: u'sudo(1)', + 45029: u'audit crash recovery', + 45030: u'SecSrvr AuthMechanism', + 45031: u'Security Assessment' +} diff --git a/plaso/winnt/__init__.py b/plaso/winnt/__init__.py new file mode 100644 index 0000000..ae78399 --- /dev/null +++ b/plaso/winnt/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/winnt/environ_expand.py b/plaso/winnt/environ_expand.py new file mode 100644 index 0000000..4cd2ac0 --- /dev/null +++ b/plaso/winnt/environ_expand.py @@ -0,0 +1,52 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a method to expand Windows environment variables.""" + +import re + + +# TODO: Remove this file once we have a better replacement for it, either +# to use the artifact library or dfVFS, since this is part of both of these +# libraries. + +# Taken from: https://code.google.com/p/grr/source/browse/lib/artifact_lib.py +def ExpandWindowsEnvironmentVariables(data_string, pre_obj): + """Take a string and expand any windows environment variables. + + Args: + data_string: A string, e.g. "%SystemRoot%\\LogFiles" + pre_obj: A pre-process object. + + Returns: + A string with available environment variables expanded. + """ + win_environ_regex = re.compile(r'%([^%]+?)%') + components = [] + offset = 0 + for match in win_environ_regex.finditer(data_string): + components.append(data_string[offset:match.start()]) + + kb_value = getattr( + pre_obj, match.group(1).lower(), None) + if isinstance(kb_value, basestring) and kb_value: + components.append(kb_value) + else: + components.append(u'%%{0:s}%%'.format(match.group(1))) + offset = match.end() + components.append(data_string[offset:]) # Append the final chunk. + return u''.join(components) diff --git a/plaso/winnt/human_readable_service_enums.py b/plaso/winnt/human_readable_service_enums.py new file mode 100644 index 0000000..b4b9922 --- /dev/null +++ b/plaso/winnt/human_readable_service_enums.py @@ -0,0 +1,44 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains constants for making service keys more readable.""" + +SERVICE_ENUMS = { + # Human readable strings for the service type. + 'Type': { + 1: 'Kernel Device Driver (0x1)', + 2: 'File System Driver (0x2)', + 4: 'Adapter (0x4)', + 16: 'Service - Own Process (0x10)', + 32: 'Service - Share Process (0x20)' + }, + # Human readable strings for the service start type. + 'Start': { + 0: 'Boot (0)', + 1: 'System (1)', + 2: 'Auto Start (2)', + 3: 'Manual (3)', + 4: 'Disabled (4)' + }, + # Human readable strings for the error handling. + 'ErrorControl': { + 0: 'Ignore (0)', + 1: 'Normal (1)', + 2: 'Severe (2)', + 3: 'Critical (3)' + } +} diff --git a/plaso/winnt/known_folder_ids.py b/plaso/winnt/known_folder_ids.py new file mode 100644 index 0000000..d8b5247 --- /dev/null +++ b/plaso/winnt/known_folder_ids.py @@ -0,0 +1,270 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Windows NT Known Folder identifier definitions.""" + +# For now ignore the line too long errors. +# pylint: disable=line-too-long + +# For now copied from: +# https://code.google.com/p/libfwsi/wiki/KnownFolderIdentifiers + +# TODO: store these in a database or equiv. + +DESCRIPTIONS = { + u'008ca0b1-55b4-4c56-b8a8-4de4b299d3be': u'Account Pictures', + u'00bcfc5a-ed94-4e48-96a1-3f6217f21990': u'Roaming Tiles', + u'0139d44e-6afe-49f2-8690-3dafcae6ffb8': u'(Common) Programs', + u'0482af6c-08f1-4c34-8c90-e17ec98b1e17': u'Public Account Pictures', + u'054fae61-4dd8-4787-80b6-090220c4b700': u'Game Explorer (Game Tasks)', + u'0762d272-c50a-4bb0-a382-697dcd729b80': u'Users (User Profiles)', + u'0ac0837c-bbf8-452a-850d-79d08e667ca7': u'Computer (My Computer)', + u'0d4c3db6-03a3-462f-a0e6-08924c41b5d4': u'History', + u'0f214138-b1d3-4a90-bba9-27cbc0c5389a': u'Sync Setup', + u'15ca69b3-30ee-49c1-ace1-6b5ec372afb5': u'Sample Playlists', + u'1777f761-68ad-4d8a-87bd-30b759fa33dd': u'Favorites', + u'18989b1d-99b5-455b-841c-ab7c74e4ddfc': u'Videos (My Video)', + u'190337d1-b8ca-4121-a639-6d472d16972a': u'Search Results (Search Home)', + u'1a6fdba2-f42d-4358-a798-b74d745926c5': u'Recorded TV', + u'1ac14e77-02e7-4e5d-b744-2eb1ae5198b7': u'System32 (System)', + u'1b3ea5dc-b587-4786-b4ef-bd1dc332aeae': u'Libraries', + u'1e87508d-89c2-42f0-8a7e-645a0f50ca58': u'Applications', + u'2112ab0a-c86a-4ffe-a368-0de96e47012e': u'Music', + u'2400183a-6185-49fb-a2d8-4a392a602ba3': u'Public Videos (Common Video)', + u'24d89e24-2f19-4534-9dde-6a6671fbb8fe': u'One Drive Documents', + u'289a9a43-be44-4057-a41b-587a76d7e7f9': u'Sync Results', + u'2a00375e-224c-49de-b8d1-440df7ef3ddc': u'Localized Resources (Directory)', + u'2b0f765d-c0e9-4171-908e-08a611b84ff6': u'Cookies', + u'2c36c0aa-5812-4b87-bfd0-4cd0dfb19b39': u'Original Images', + u'3214fab5-9757-4298-bb61-92a9deaa44ff': u'Public Music (Common Music)', + u'339719b5-8c47-4894-94c2-d8f77add44a6': u'One Drive Pictures', + u'33e28130-4e1e-4676-835a-98395c3bc3bb': u'Pictures (My Pictures)', + u'352481e8-33be-4251-ba85-6007caedcf9d': u'Internet Cache (Temporary Internet Files)', + u'374de290-123f-4565-9164-39c4925e467b': u'Downloads', + u'3d644c9b-1fb8-4f30-9b45-f670235f79c0': u'Public Downloads (Common Downloads)', + u'3eb685db-65f9-4cf6-a03a-e3ef65729f3d': u'Roaming Application Data (Roaming)', + u'43668bf8-c14e-49b2-97c9-747784d784b7': u'Sync Center (Sync Manager)', + u'48daf80b-e6cf-4f4e-b800-0e69d84ee384': u'Libraries', + u'491e922f-5643-4af4-a7eb-4e7a138d8174': u'Videos', + u'4bd8d571-6d19-48d3-be97-422220080e43': u'Music (My Music)', + u'4bfefb45-347d-4006-a5be-ac0cb0567192': u'Conflicts', + u'4c5c32ff-bb9d-43b0-b5b4-2d72e54eaaa4': u'Saved Games', + u'4d9f7874-4e0c-4904-967b-40b0d20c3e4b': u'Internet (The Internet)', + u'52528a6b-b9e3-4add-b60d-588c2dba842d': u'Homegroup', + u'52a4f021-7b75-48a9-9f6b-4b87a210bc8f': u'Quick Launch', + u'56784854-c6cb-462b-8169-88e350acb882': u'Contacts', + u'5b3749ad-b49f-49c1-83eb-15370fbd4882': u'Tree Properties', + u'5cd7aee2-2219-4a67-b85d-6c9ce15660cb': u'Programs', + u'5ce4a5e9-e4eb-479d-b89f-130c02886155': u'Device Metadata Store', + u'5e6c858f-0e22-4760-9afe-ea3317b67173': u'Profile (User\'s name)', + u'625b53c3-ab48-4ec1-ba1f-a1ef4146fc19': u'Start Menu', + u'62ab5d82-fdc1-4dc3-a9dd-070d1d495d97': u'Program Data', + u'6365d5a7-0f0d-45e5-87f6-0da56b6a4f7d': u'Common Files (x64)', + u'69d2cf90-fc33-4fb7-9a0c-ebb0f0fcb43c': u'Slide Shows (Photo Albums)', + u'6d809377-6af0-444b-8957-a3773f02200e': u'Program Files (x64)', + u'6f0cd92b-2e97-45d1-88ff-b0d186b8dedd': u'Network Connections', + u'724ef170-a42d-4fef-9f26-b60e846fba4f': u'Administrative Tools', + u'767e6811-49cb-4273-87c2-20f355e1085b': u'One Drive Camera Roll', + u'76fc4e2d-d6ad-4519-a663-37bd56068185': u'Printers', + u'7b0db17d-9cd2-4a93-9733-46cc89022e7c': u'Documents', + u'7b396e54-9ec5-4300-be0a-2482ebae1a26': u'Default Gadgets (Sidebar Default Parts)', + u'7c5a40ef-a0fb-4bfc-874a-c0f2e0b9fa8e': u'Program Files (x86)', + u'7d1d3a04-debb-4115-95cf-2f29da2920da': u'Saved Searches (Searches)', + u'7e636bfe-dfa9-4d5e-b456-d7b39851d8a9': u'Templates', + u'82a5ea35-d9cd-47c5-9629-e15d2f714e6e': u'(Common) Startup', + u'82a74aeb-aeb4-465c-a014-d097ee346d63': u'Control Panel', + u'859ead94-2e85-48ad-a71a-0969cb56a6cd': u'Sample Videos', + u'8983036c-27c0-404b-8f08-102d10dcfd74': u'Send To', + u'8ad10c31-2adb-4296-a8f7-e4701232c972': u'Resources (Resources Directory)', + u'905e63b6-c1bf-494e-b29c-65b732d3d21a': u'Program Files', + u'9274bd8d-cfd1-41c3-b35e-b13f55a758f4': u'Printer Shortcuts (PrintHood)', + u'98ec0e18-2098-4d44-8644-66979315a281': u'Microsoft Office Outlook (MAPI)', + u'9b74b6a3-0dfd-4f11-9e78-5f7800f2e772': u'User\'s name', + u'9e3995ab-1f9c-4f13-b827-48b24b6c7174': u'User Pinned', + u'9e52ab10-f80d-49df-acb8-4330f5687855': u'Temporary Burn Folder (CD Burning)', + u'a302545d-deff-464b-abe8-61c8648d939b': u'Libraries', + u'a305ce99-f527-492b-8b1a-7e76fa98d6e4': u'Installed Updates (Application Updates)', + u'a3918781-e5f2-4890-b3d9-a7e54332328c': u'Application Shortcuts', + u'a4115719-d62e-491d-aa7c-e74b8be3b067': u'(Common) Start Menu', + u'a520a1a4-1780-4ff6-bd18-167343c5af16': u'Local Application Data Low (Local Low)', + u'a52bba46-e9e1-435f-b3d9-28daa648c0f6': u'One Drive', + u'a63293e8-664e-48db-a079-df759e0509f7': u'Templates', + u'a75d362e-50fc-4fb7-ac2c-a8beaa314493': u'Gadgets (Sidebar Parts)', + u'a77f5d77-2e2b-44c3-a6a2-aba601054a51': u'Programs', + u'a990ae9f-a03b-4e80-94bc-9912d7504104': u'Pictures', + u'aaa8d5a5-f1d6-4259-baa8-78e7ef60835e': u'Roamed Tile Images', + u'ab5fb87b-7ce2-4f83-915d-550846c9537b': u'Camera Roll', + u'ae50c081-ebd2-438a-8655-8a092e34987a': u'Recent (Recent Items)', + u'b250c668-f57d-4ee1-a63c-290ee7d1aa1f': u'Sample Music', + u'b4bfcc3a-db2c-424c-b029-7fe99a87c641': u'Desktop', + u'b6ebfb86-6907-413c-9af7-4fc2abf07cc5': u'Public Pictures (Common Pictures)', + u'b7534046-3ecb-4c18-be4e-64cd4cb7d6ac': u'Recycle Bin (Bit Bucket)', + u'b7bede81-df94-4682-a7d8-57a52620b86f': u'Screenshots', + u'b94237e7-57ac-4347-9151-b08c6c32d1f7': u'(Common) Templates', + u'b97d20bb-f46a-4c97-ba10-5e3608430854': u'Startup', + u'bcb5256f-79f6-4cee-b725-dc34e402fd46': u'Implicit Application Shortcuts', + u'bcbd3057-ca5c-4622-b42d-bc56db0ae516': u'Programs', + u'bd85e001-112e-431e-983b-7b15ac09fff1': u'Recorded TV', + u'bfb9d5e0-c6a9-404c-b2b2-ae6db6af4968': u'Links', + u'c1bae2d0-10df-4334-bedd-7aa20b227a9d': u'(Common) OEM Links', + u'c4900540-2379-4c75-844b-64e6faf8716b': u'Sample Pictures', + u'c4aa340d-f20f-4863-afef-f87ef2e6ba25': u'Public Desktop (Common Desktop)', + u'c5abbf53-e17f-4121-8900-86626fc2c973': u'Network Shortcuts (NetHood)', + u'c870044b-f49e-4126-a9c3-b52a1ff411e8': u'Ringtones', + u'cac52c1a-b53d-4edc-92d7-6b2e8ac19434': u'Games', + u'd0384e7d-bac3-4797-8f14-cba229b392b5': u'(Common) Administrative Tools', + u'd20beec4-5ca8-4905-ae3b-bf251ea09b53': u'Network (Places)', + u'd65231b0-b2f1-4857-a4ce-a8e7c6ea7d27': u'System32 (x86)', + u'd9dc8a3b-b784-432e-a781-5a1130a75963': u'History', + u'de61d971-5ebc-4f02-a3a9-6c82895e5c04': u'Add New Programs (Get Programs)', + u'de92c1c7-837f-4f69-a3bb-86e631204a23': u'Playlists', + u'de974d24-d9c6-4d3e-bf91-f4455120b917': u'Common Files (x86)', + u'debf2536-e1a8-4c59-b6a2-414586476aea': u'Game Explorer (Public Game Tasks)', + u'df7266ac-9274-4867-8d55-3bd661de872d': u'Programs and Features (Change and Remove Programs)', + u'dfdf76a2-c82a-4d63-906a-5644ac457385': u'Public', + u'e555ab60-153b-4d17-9f04-a5fe99fc15ec': u'Ringtones', + u'ed4824af-dce4-45a8-81e2-fc7965083634': u'Public Documents (Common Documents)', + u'ee32e446-31ca-4aba-814f-a5ebd2fd6d5e': u'Offline Files (CSC)', + u'f1b32785-6fba-4fcf-9d55-7b8e7f157091': u'Local Application Data', + u'f38bf404-1d43-42f2-9305-67de0b28fc23': u'Windows', + u'f3ce0f7c-4901-4acc-8648-d5d44b04ef8f': u'User\'s Files', + u'f7f1ed05-9f6d-47a2-aaae-29d317c6f066': u'Common Files', + u'fd228cb7-ae11-4ae3-864c-16f3910ab8fe': u'Fonts', + u'fdd39ad0-238f-46af-adb4-6c85480369c7': u'Documents (Personal)', +} + +PATHS = { + u'008ca0b1-55b4-4c56-b8a8-4de4b299d3be': u'%APPDATA%\\Microsoft\\Windows\\AccountPictures', + u'00bcfc5a-ed94-4e48-96a1-3f6217f21990': u'%LOCALAPPDATA%\\Microsoft\\Windows\\RoamingTiles', + u'0139d44e-6afe-49f2-8690-3dafcae6ffb8': u'%ALLUSERSPROFILE%\\Microsoft\\Windows\\Start Menu\\Programs', + u'0482af6c-08f1-4c34-8c90-e17ec98b1e17': u'%PUBLIC%\\AccountPictures', + u'054fae61-4dd8-4787-80b6-090220c4b700': u'%LOCALAPPDATA%\\Microsoft\\Windows\\GameExplorer', + u'0762d272-c50a-4bb0-a382-697dcd729b80': u'%SYSTEMDRIVE%\\Users', + u'0ac0837c-bbf8-452a-850d-79d08e667ca7': u'', + u'0d4c3db6-03a3-462f-a0e6-08924c41b5d4': u'%LOCALAPPDATA%\\Microsoft\\Windows\\ConnectedSearch\\History', + u'0f214138-b1d3-4a90-bba9-27cbc0c5389a': u'', + u'15ca69b3-30ee-49c1-ace1-6b5ec372afb5': u'%PUBLIC%\\Music\\Sample Playlists', + u'1777f761-68ad-4d8a-87bd-30b759fa33dd': u'%USERPROFILE%\\Favorites', + u'18989b1d-99b5-455b-841c-ab7c74e4ddfc': u'%USERPROFILE%\\Videos', + u'190337d1-b8ca-4121-a639-6d472d16972a': u'', + u'1a6fdba2-f42d-4358-a798-b74d745926c5': u'%PUBLIC%\\RecordedTV.library-ms', + u'1ac14e77-02e7-4e5d-b744-2eb1ae5198b7': u'%WINDIR%\\System32', + u'1b3ea5dc-b587-4786-b4ef-bd1dc332aeae': u'%APPDATA%\\Microsoft\\Windows\\Libraries', + u'1e87508d-89c2-42f0-8a7e-645a0f50ca58': u'', + u'2112ab0a-c86a-4ffe-a368-0de96e47012e': u'%APPDATA%\\Microsoft\\Windows\\Libraries\\Music.library-ms', + u'2400183a-6185-49fb-a2d8-4a392a602ba3': u'%PUBLIC%\\Videos', + u'24d89e24-2f19-4534-9dde-6a6671fbb8fe': u'%USERPROFILE%\\OneDrive\\Documents', + u'289a9a43-be44-4057-a41b-587a76d7e7f9': u'', + u'2a00375e-224c-49de-b8d1-440df7ef3ddc': u'%WINDIR%\\resources\\%CODEPAGE%', + u'2b0f765d-c0e9-4171-908e-08a611b84ff6': u'%APPDATA%\\Microsoft\\Windows\\Cookies', + u'2c36c0aa-5812-4b87-bfd0-4cd0dfb19b39': u'%LOCALAPPDATA%\\Microsoft\\Windows Photo Gallery\\Original Images', + u'3214fab5-9757-4298-bb61-92a9deaa44ff': u'%PUBLIC%\\Music', + u'339719b5-8c47-4894-94c2-d8f77add44a6': u'%USERPROFILE%\\OneDrive\\Pictures', + u'33e28130-4e1e-4676-835a-98395c3bc3bb': u'%USERPROFILE%\\Pictures', + u'352481e8-33be-4251-ba85-6007caedcf9d': u'%LOCALAPPDATA%\\Microsoft\\Windows\\Temporary Internet Files', + u'374de290-123f-4565-9164-39c4925e467b': u'%USERPROFILE%\\Downloads', + u'3d644c9b-1fb8-4f30-9b45-f670235f79c0': u'%PUBLIC%\\Downloads', + u'3eb685db-65f9-4cf6-a03a-e3ef65729f3d': u'%USERPROFILE%\\AppData\\Roaming', + u'43668bf8-c14e-49b2-97c9-747784d784b7': u'', + u'48daf80b-e6cf-4f4e-b800-0e69d84ee384': u'%ALLUSERSPROFILE%\\Microsoft\\Windows\\Libraries', + u'491e922f-5643-4af4-a7eb-4e7a138d8174': u'%APPDATA%\\Microsoft\\Windows\\Libraries\\Videos.library-ms', + u'4bd8d571-6d19-48d3-be97-422220080e43': u'%USERPROFILE%\\Music', + u'4bfefb45-347d-4006-a5be-ac0cb0567192': u'', + u'4c5c32ff-bb9d-43b0-b5b4-2d72e54eaaa4': u'%USERPROFILE%\\Saved Games', + u'4d9f7874-4e0c-4904-967b-40b0d20c3e4b': u'', + u'52528a6b-b9e3-4add-b60d-588c2dba842d': u'', + u'52a4f021-7b75-48a9-9f6b-4b87a210bc8f': u'%APPDATA%\\Microsoft\\Internet Explorer\\Quick Launch', + u'56784854-c6cb-462b-8169-88e350acb882': u'', + u'5b3749ad-b49f-49c1-83eb-15370fbd4882': u'', + u'5cd7aee2-2219-4a67-b85d-6c9ce15660cb': u'%LOCALAPPDATA%\\Programs', + u'5ce4a5e9-e4eb-479d-b89f-130c02886155': u'%ALLUSERSPROFILE%\\Microsoft\\Windows\\DeviceMetadataStore', + u'5e6c858f-0e22-4760-9afe-ea3317b67173': u'%SYSTEMDRIVE%\\Users\\%USERNAME%', + u'625b53c3-ab48-4ec1-ba1f-a1ef4146fc19': u'%APPDATA%\\Microsoft\\Windows\\Start Menu', + u'62ab5d82-fdc1-4dc3-a9dd-070d1d495d97': u'%SYSTEMDRIVE%\\ProgramData', + u'6365d5a7-0f0d-45e5-87f6-0da56b6a4f7d': u'%PROGRAMFILES%\\Common Files', + u'69d2cf90-fc33-4fb7-9a0c-ebb0f0fcb43c': u'%USERPROFILE%\\Pictures\\Slide Shows', + u'6d809377-6af0-444b-8957-a3773f02200e': u'%SYSTEMDRIVE%\\Program Files', + u'6f0cd92b-2e97-45d1-88ff-b0d186b8dedd': u'', + u'724ef170-a42d-4fef-9f26-b60e846fba4f': u'%APPDATA%\\Microsoft\\Windows\\Start Menu\\Programs\\Administrative Tools', + u'767e6811-49cb-4273-87c2-20f355e1085b': u'%USERPROFILE%\\OneDrive\\Pictures\\Camera Roll', + u'76fc4e2d-d6ad-4519-a663-37bd56068185': u'', + u'7b0db17d-9cd2-4a93-9733-46cc89022e7c': u'%APPDATA%\\Microsoft\\Windows\\Libraries\\Documents.library-ms', + u'7b396e54-9ec5-4300-be0a-2482ebae1a26': u'%PROGRAMFILES%\\Windows Sidebar\\Gadgets', + u'7c5a40ef-a0fb-4bfc-874a-c0f2e0b9fa8e': u'%PROGRAMFILES% (%SYSTEMDRIVE%\\Program Files)', + u'7d1d3a04-debb-4115-95cf-2f29da2920da': u'%USERPROFILE%\\Searches', + u'7e636bfe-dfa9-4d5e-b456-d7b39851d8a9': u'%LOCALAPPDATA%\\Microsoft\\Windows\\ConnectedSearch\\Templates', + u'82a5ea35-d9cd-47c5-9629-e15d2f714e6e': u'%ALLUSERSPROFILE%\\Microsoft\\Windows\\Start Menu\\Programs\\StartUp', + u'82a74aeb-aeb4-465c-a014-d097ee346d63': u'', + u'859ead94-2e85-48ad-a71a-0969cb56a6cd': u'%PUBLIC%\\Videos\\Sample Videos', + u'8983036c-27c0-404b-8f08-102d10dcfd74': u'%APPDATA%\\Microsoft\\Windows\\SendTo', + u'8ad10c31-2adb-4296-a8f7-e4701232c972': u'%WINDIR%\\Resources', + u'905e63b6-c1bf-494e-b29c-65b732d3d21a': u'%SYSTEMDRIVE%\\Program Files', + u'9274bd8d-cfd1-41c3-b35e-b13f55a758f4': u'%APPDATA%\\Microsoft\\Windows\\Printer Shortcuts', + u'98ec0e18-2098-4d44-8644-66979315a281': u'', + u'9b74b6a3-0dfd-4f11-9e78-5f7800f2e772': u'', + u'9e3995ab-1f9c-4f13-b827-48b24b6c7174': u'%APPDATA%\\Microsoft\\Internet Explorer\\Quick Launch\\User Pinned', + u'9e52ab10-f80d-49df-acb8-4330f5687855': u'%LOCALAPPDATA%\\Microsoft\\Windows\\Burn\\Burn', + u'a302545d-deff-464b-abe8-61c8648d939b': u'', + u'a305ce99-f527-492b-8b1a-7e76fa98d6e4': u'', + u'a3918781-e5f2-4890-b3d9-a7e54332328c': u'%LOCALAPPDATA%\\Microsoft\\Windows\\Application Shortcuts', + u'a4115719-d62e-491d-aa7c-e74b8be3b067': u'%ALLUSERSPROFILE%\\Microsoft\\Windows\\Start Menu', + u'a520a1a4-1780-4ff6-bd18-167343c5af16': u'%USERPROFILE%\\AppData\\LocalLow', + u'a52bba46-e9e1-435f-b3d9-28daa648c0f6': u'%USERPROFILE%\\OneDrive', + u'a63293e8-664e-48db-a079-df759e0509f7': u'%APPDATA%\\Microsoft\\Windows\\Templates', + u'a75d362e-50fc-4fb7-ac2c-a8beaa314493': u'%LOCALAPPDATA%\\Microsoft\\Windows Sidebar\\Gadgets', + u'a77f5d77-2e2b-44c3-a6a2-aba601054a51': u'%APPDATA%\\Microsoft\\Windows\\Start Menu\\Programs', + u'a990ae9f-a03b-4e80-94bc-9912d7504104': u'%APPDATA%\\Microsoft\\Windows\\Libraries\\Pictures.library-ms', + u'aaa8d5a5-f1d6-4259-baa8-78e7ef60835e': u'%LOCALAPPDATA%\\Microsoft\\Windows\\RoamedTileImages', + u'ab5fb87b-7ce2-4f83-915d-550846c9537b': u'%USERPROFILE%\\Pictures\\Camera Roll', + u'ae50c081-ebd2-438a-8655-8a092e34987a': u'%APPDATA%\\Microsoft\\Windows\\Recent', + u'b250c668-f57d-4ee1-a63c-290ee7d1aa1f': u'%PUBLIC%\\Music\\Sample Music', + u'b4bfcc3a-db2c-424c-b029-7fe99a87c641': u'%USERPROFILE%\\Desktop', + u'b6ebfb86-6907-413c-9af7-4fc2abf07cc5': u'%PUBLIC%\\Pictures', + u'b7534046-3ecb-4c18-be4e-64cd4cb7d6ac': u'', + u'b7bede81-df94-4682-a7d8-57a52620b86f': u'%USERPROFILE%\\Pictures\\Screenshots', + u'b94237e7-57ac-4347-9151-b08c6c32d1f7': u'%ALLUSERSPROFILE%\\Microsoft\\Windows\\Templates', + u'b97d20bb-f46a-4c97-ba10-5e3608430854': u'%APPDATA%\\Microsoft\\Windows\\Start Menu\\Programs\\StartUp', + u'bcb5256f-79f6-4cee-b725-dc34e402fd46': u'%APPDATA%\\Microsoft\\Internet Explorer\\Quick Launch\\User Pinned\\ImplicitAppShortcuts', + u'bcbd3057-ca5c-4622-b42d-bc56db0ae516': u'%LOCALAPPDATA%\\Programs\\Common', + u'bd85e001-112e-431e-983b-7b15ac09fff1': u'', + u'bfb9d5e0-c6a9-404c-b2b2-ae6db6af4968': u'%USERPROFILE%\\Links', + u'c1bae2d0-10df-4334-bedd-7aa20b227a9d': u'%ALLUSERSPROFILE%\\OEM Links', + u'c4900540-2379-4c75-844b-64e6faf8716b': u'%PUBLIC%\\Pictures\\Sample Pictures', + u'c4aa340d-f20f-4863-afef-f87ef2e6ba25': u'%PUBLIC%\\Desktop', + u'c5abbf53-e17f-4121-8900-86626fc2c973': u'%APPDATA%\\Microsoft\\Windows\\Network Shortcuts', + u'c870044b-f49e-4126-a9c3-b52a1ff411e8': u'%LOCALAPPDATA%\\Microsoft\\Windows\\Ringtones', + u'cac52c1a-b53d-4edc-92d7-6b2e8ac19434': u'', + u'd0384e7d-bac3-4797-8f14-cba229b392b5': u'%ALLUSERSPROFILE%\\Microsoft\\Windows\\Start Menu\\Programs\\Administrative Tools', + u'd20beec4-5ca8-4905-ae3b-bf251ea09b53': u'', + u'd65231b0-b2f1-4857-a4ce-a8e7c6ea7d27': u'%WINDIR%\\system32', + u'd9dc8a3b-b784-432e-a781-5a1130a75963': u'%LOCALAPPDATA%\\Microsoft\\Windows\\History', + u'de61d971-5ebc-4f02-a3a9-6c82895e5c04': u'', + u'de92c1c7-837f-4f69-a3bb-86e631204a23': u'%USERPROFILE%\\Music\\Playlists', + u'de974d24-d9c6-4d3e-bf91-f4455120b917': u'%PROGRAMFILES%\\Common Files', + u'debf2536-e1a8-4c59-b6a2-414586476aea': u'%ALLUSERSPROFILE%\\Microsoft\\Windows\\GameExplorer', + u'df7266ac-9274-4867-8d55-3bd661de872d': u'', + u'dfdf76a2-c82a-4d63-906a-5644ac457385': u'%SYSTEMDRIVE%\\Users\\Public', + u'e555ab60-153b-4d17-9f04-a5fe99fc15ec': u'%ALLUSERSPROFILE%\\Microsoft\\Windows\\Ringtones', + u'ed4824af-dce4-45a8-81e2-fc7965083634': u'%PUBLIC%\\Documents', + u'ee32e446-31ca-4aba-814f-a5ebd2fd6d5e': u'', + u'f1b32785-6fba-4fcf-9d55-7b8e7f157091': u'%USERPROFILE%\\AppData\\Local', + u'f38bf404-1d43-42f2-9305-67de0b28fc23': u'%WINDIR%', + u'f3ce0f7c-4901-4acc-8648-d5d44b04ef8f': u'', + u'f7f1ed05-9f6d-47a2-aaae-29d317c6f066': u'%PROGRAMFILES%\\Common Files', + u'fd228cb7-ae11-4ae3-864c-16f3910ab8fe': u'%WINDIR%\\Fonts', + u'fdd39ad0-238f-46af-adb4-6c85480369c7': u'%USERPROFILE%\\Documents', +} diff --git a/plaso/winnt/shell_folder_ids.py b/plaso/winnt/shell_folder_ids.py new file mode 100644 index 0000000..818da71 --- /dev/null +++ b/plaso/winnt/shell_folder_ids.py @@ -0,0 +1,204 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Windows NT shell folder identifier definitions.""" + +# For now ignore the line too long errors. +# pylint: disable=line-too-long + +# For now copied from: +# https://code.google.com/p/libfwsi/wiki/ShellFolderIdentifiers + +# TODO: store these in a database or equiv. + +DESCRIPTIONS = { + u'00020d75-0000-0000-c000-000000000046': u'Inbox', + u'00020d76-0000-0000-c000-000000000046': u'Inbox', + u'00c6d95f-329c-409a-81d7-c46c66ea7f33': u'Default Location', + u'0142e4d0-fb7a-11dc-ba4a-000ffe7ab428': u'Biometric Devices (Biometrics)', + u'025a5937-a6be-4686-a844-36fe4bec8b6d': u'Power Options', + u'031e4825-7b94-4dc3-b131-e946b44c8dd5': u'Users Libraries', + u'04731b67-d933-450a-90e6-4acd2e9408fe': u'Search Folder', + u'05d7b0f4-2121-4eff-bf6b-ed3f69b894d9': u'Taskbar (Notification Area Icons)', + u'0afaced1-e828-11d1-9187-b532f1e9575d': u'Folder Shortcut', + u'0cd7a5c0-9f37-11ce-ae65-08002b2e1262': u'Cabinet File', + u'0df44eaa-ff21-4412-828e-260a8728e7f1': u'Taskbar and Start Menu', + u'11016101-e366-4d22-bc06-4ada335c892b': u'Internet Explorer History and Feeds Shell Data Source for Windows Search', + u'1206f5f1-0569-412c-8fec-3204630dfb70': u'Credential Manager', + u'13e7f612-f261-4391-bea2-39df4f3fa311': u'Windows Desktop Search', + u'15eae92e-f17a-4431-9f28-805e482dafd4': u'Install New Programs (Get Programs)', + u'1723d66a-7a12-443e-88c7-05e1bfe79983': u'Previous Versions Delegate Folder', + u'17cd9488-1228-4b2f-88ce-4298e93e0966': u'Default Programs (Set User Defaults)', + u'1a9ba3a0-143a-11cf-8350-444553540000': u'Shell Favorite Folder', + u'1d2680c9-0e2a-469d-b787-065558bc7d43': u'Fusion Cache', + u'1f3427c8-5c10-4210-aa03-2ee45287d668': u'User Pinned', + u'1f43a58c-ea28-43e6-9ec4-34574a16ebb7': u'Windows Desktop Search MAPI Namespace Extension Class', + u'1f4de370-d627-11d1-ba4f-00a0c91eedba': u'Search Results - Computers (Computer Search Results Folder, Network Computers)', + u'1fa9085f-25a2-489b-85d4-86326eedcd87': u'Manage Wireless Networks', + u'208d2c60-3aea-1069-a2d7-08002b30309d': u'My Network Places', + u'20d04fe0-3aea-1069-a2d8-08002b30309d': u'My Computer', + u'21ec2020-3aea-1069-a2dd-08002b30309d': u'Control Panel', + u'2227a280-3aea-1069-a2de-08002b30309d': u'Printers and Faxes', + u'241d7c96-f8bf-4f85-b01f-e2b043341a4b': u'Workspaces Center (Remote Application and Desktop Connections)', + u'2559a1f0-21d7-11d4-bdaf-00c04f60b9f0': u'Search', + u'2559a1f1-21d7-11d4-bdaf-00c04f60b9f0': u'Help and Support', + u'2559a1f2-21d7-11d4-bdaf-00c04f60b9f0': u'Windows Security', + u'2559a1f3-21d7-11d4-bdaf-00c04f60b9f0': u'Run...', + u'2559a1f4-21d7-11d4-bdaf-00c04f60b9f0': u'Internet', + u'2559a1f5-21d7-11d4-bdaf-00c04f60b9f0': u'E-mail', + u'2559a1f7-21d7-11d4-bdaf-00c04f60b9f0': u'Set Program Access and Defaults', + u'267cf8a9-f4e3-41e6-95b1-af881be130ff': u'Location Folder', + u'26ee0668-a00a-44d7-9371-beb064c98683': u'Control Panel', + u'2728520d-1ec8-4c68-a551-316b684c4ea7': u'Network Setup Wizard', + u'28803f59-3a75-4058-995f-4ee5503b023c': u'Bluetooth Devices', + u'289978ac-a101-4341-a817-21eba7fd046d': u'Sync Center Conflict Folder', + u'289af617-1cc3-42a6-926c-e6a863f0e3ba': u'DLNA Media Servers Data Source', + u'2965e715-eb66-4719-b53f-1672673bbefa': u'Results Folder', + u'2e9e59c0-b437-4981-a647-9c34b9b90891': u'Sync Setup Folder', + u'2f6ce85c-f9ee-43ca-90c7-8a9bd53a2467': u'File History Data Source', + u'3080f90d-d7ad-11d9-bd98-0000947b0257': u'Show Desktop', + u'3080f90e-d7ad-11d9-bd98-0000947b0257': u'Window Switcher', + u'323ca680-c24d-4099-b94d-446dd2d7249e': u'Common Places', + u'328b0346-7eaf-4bbe-a479-7cb88a095f5b': u'Layout Folder', + u'335a31dd-f04b-4d76-a925-d6b47cf360df': u'Backup and Restore Center', + u'35786d3c-b075-49b9-88dd-029876e11c01': u'Portable Devices', + u'36eef7db-88ad-4e81-ad49-0e313f0c35f8': u'Windows Update', + u'3c5c43a3-9ce9-4a9b-9699-2ac0cf6cc4bf': u'Configure Wireless Network', + u'3f6bc534-dfa1-4ab4-ae54-ef25a74e0107': u'System Restore', + u'4026492f-2f69-46b8-b9bf-5654fc07e423': u'Windows Firewall', + u'418c8b64-5463-461d-88e0-75e2afa3c6fa': u'Explorer Browser Results Folder', + u'4234d49b-0245-4df3-b780-3893943456e1': u'Applications', + u'437ff9c0-a07f-4fa0-af80-84b6c6440a16': u'Command Folder', + u'450d8fba-ad25-11d0-98a8-0800361b1103': u'My Documents', + u'48e7caab-b918-4e58-a94d-505519c795dc': u'Start Menu Folder', + u'5399e694-6ce5-4d6c-8fce-1d8870fdcba0': u'Control Panel command object for Start menu and desktop', + u'58e3c745-d971-4081-9034-86e34b30836a': u'Speech Recognition Options', + u'59031a47-3f72-44a7-89c5-5595fe6b30ee': u'Shared Documents Folder (Users Files)', + u'5ea4f148-308c-46d7-98a9-49041b1dd468': u'Mobility Center Control Panel', + u'60632754-c523-4b62-b45c-4172da012619': u'User Accounts', + u'63da6ec0-2e98-11cf-8d82-444553540000': u'Microsoft FTP Folder', + u'640167b4-59b0-47a6-b335-a6b3c0695aea': u'Portable Media Devices', + u'645ff040-5081-101b-9f08-00aa002f954e': u'Recycle Bin', + u'64693913-1c21-4f30-a98f-4e52906d3b56': u'CLSID_AppInstanceFolder', + u'67718415-c450-4f3c-bf8a-b487642dc39b': u'Windows Features', + u'6785bfac-9d2d-4be5-b7e2-59937e8fb80a': u'Other Users Folder', + u'67ca7650-96e6-4fdd-bb43-a8e774f73a57': u'Home Group Control Panel (Home Group)', + u'692f0339-cbaa-47e6-b5b5-3b84db604e87': u'Extensions Manager Folder', + u'6dfd7c5c-2451-11d3-a299-00c04f8ef6af': u'Folder Options', + u'7007acc7-3202-11d1-aad2-00805fc1270e': u'Network Connections (Network and Dial-up Connections)', + u'708e1662-b832-42a8-bbe1-0a77121e3908': u'Tree property value folder', + u'71d99464-3b6b-475c-b241-e15883207529': u'Sync Results Folder', + u'72b36e70-8700-42d6-a7f7-c9ab3323ee51': u'Search Connector Folder', + u'78f3955e-3b90-4184-bd14-5397c15f1efc': u'Performance Information and Tools', + u'7a9d77bd-5403-11d2-8785-2e0420524153': u'User Accounts (Users and Passwords)', + u'7b81be6a-ce2b-4676-a29e-eb907a5126c5': u'Programs and Features', + u'7bd29e00-76c1-11cf-9dd0-00a0c9034933': u'Temporary Internet Files', + u'7bd29e01-76c1-11cf-9dd0-00a0c9034933': u'Temporary Internet Files', + u'7be9d83c-a729-4d97-b5a7-1b7313c39e0a': u'Programs Folder', + u'8060b2e3-c9d7-4a5d-8c6b-ce8eba111328': u'Proximity CPL', + u'8343457c-8703-410f-ba8b-8b026e431743': u'Feedback Tool', + u'85bbd920-42a0-1069-a2e4-08002b30309d': u'Briefcase', + u'863aa9fd-42df-457b-8e4d-0de1b8015c60': u'Remote Printers', + u'865e5e76-ad83-4dca-a109-50dc2113ce9a': u'Programs Folder and Fast Items', + u'871c5380-42a0-1069-a2ea-08002b30309d': u'Internet Explorer (Homepage)', + u'87630419-6216-4ff8-a1f0-143562d16d5c': u'Mobile Broadband Profile Settings Editor', + u'877ca5ac-cb41-4842-9c69-9136e42d47e2': u'File Backup Index', + u'88c6c381-2e85-11d0-94de-444553540000': u'ActiveX Cache Folder', + u'896664f7-12e1-490f-8782-c0835afd98fc': u'Libraries delegate folder that appears in Users Files Folder', + u'8e908fc9-becc-40f6-915b-f4ca0e70d03d': u'Network and Sharing Center', + u'8fd8b88d-30e1-4f25-ac2b-553d3d65f0ea': u'DXP', + u'9113a02d-00a3-46b9-bc5f-9c04daddd5d7': u'Enhanced Storage Data Source', + u'93412589-74d4-4e4e-ad0e-e0cb621440fd': u'Font Settings', + u'9343812e-1c37-4a49-a12e-4b2d810d956b': u'Search Home', + u'96437431-5a90-4658-a77c-25478734f03e': u'Server Manager', + u'96ae8d84-a250-4520-95a5-a47a7e3c548b': u'Parental Controls', + u'98d99750-0b8a-4c59-9151-589053683d73': u'Windows Search Service Media Center Namespace Extension Handler', + u'98f275b4-4fff-11e0-89e2-7b86dfd72085': u'CLSID_StartMenuLauncherProviderFolder', + u'992cffa0-f557-101a-88ec-00dd010ccc48': u'Network Connections (Network and Dial-up Connections)', + u'9a096bb5-9dc3-4d1c-8526-c3cbf991ea4e': u'Internet Explorer RSS Feeds Folder', + u'9c60de1e-e5fc-40f4-a487-460851a8d915': u'AutoPlay', + u'9c73f5e5-7ae7-4e32-a8e8-8d23b85255bf': u'Sync Center Folder', + u'9db7a13c-f208-4981-8353-73cc61ae2783': u'Previous Versions', + u'9f433b7c-5f96-4ce1-ac28-aeaa1cc04d7c': u'Security Center', + u'9fe63afd-59cf-4419-9775-abcc3849f861': u'System Recovery (Recovery)', + u'a00ee528-ebd9-48b8-944a-8942113d46ac': u'CLSID_StartMenuCommandingProviderFolder', + u'a3c3d402-e56c-4033-95f7-4885e80b0111': u'Previous Versions Results Delegate Folder', + u'a5a3563a-5755-4a6f-854e-afa3230b199f': u'Library Folder', + u'a5e46e3a-8849-11d1-9d8c-00c04fc99d61': u'Microsoft Browser Architecture', + u'a6482830-08eb-41e2-84c1-73920c2badb9': u'Removable Storage Devices', + u'a8a91a66-3a7d-4424-8d24-04e180695c7a': u'Device Center (Devices and Printers)', + u'aee2420f-d50e-405c-8784-363c582bf45a': u'Device Pairing Folder', + u'afdb1f70-2a4c-11d2-9039-00c04f8eeb3e': u'Offline Files Folder', + u'b155bdf8-02f0-451e-9a26-ae317cfd7779': u'Delegate folder that appears in Computer', + u'b2952b16-0e07-4e5a-b993-58c52cb94cae': u'DB Folder', + u'b4fb3f98-c1ea-428d-a78a-d1f5659cba93': u'Other Users Folder', + u'b98a2bea-7d42-4558-8bd1-832f41bac6fd': u'Backup And Restore (Backup and Restore Center)', + u'bb06c0e4-d293-4f75-8a90-cb05b6477eee': u'System', + u'bb64f8a7-bee7-4e1a-ab8d-7d8273f7fdb6': u'Action Center Control Panel', + u'bc476f4c-d9d7-4100-8d4e-e043f6dec409': u'Microsoft Browser Architecture', + u'bc48b32f-5910-47f5-8570-5074a8a5636a': u'Sync Results Delegate Folder', + u'bd84b380-8ca2-1069-ab1d-08000948f534': u'Microsoft Windows Font Folder', + u'bdeadf00-c265-11d0-bced-00a0c90ab50f': u'Web Folders', + u'be122a0e-4503-11da-8bde-f66bad1e3f3a': u'Windows Anytime Upgrade', + u'bf782cc9-5a52-4a17-806c-2a894ffeeac5': u'Language Settings', + u'c291a080-b400-4e34-ae3f-3d2b9637d56c': u'UNCFATShellFolder Class', + u'c2b136e2-d50e-405c-8784-363c582bf43e': u'Device Center Initialization', + u'c555438b-3c23-4769-a71f-b6d3d9b6053a': u'Display', + u'c57a6066-66a3-4d91-9eb9-41532179f0a5': u'Application Suggested Locations', + u'c58c4893-3be0-4b45-abb5-a63e4b8c8651': u'Troubleshooting', + u'cb1b7f8c-c50a-4176-b604-9e24dee8d4d1': u'Welcome Center (Getting Started)', + u'd2035edf-75cb-4ef1-95a7-410d9ee17170': u'DLNA Content Directory Data Source', + u'd20ea4e1-3957-11d2-a40b-0c5020524152': u'Fonts', + u'd20ea4e1-3957-11d2-a40b-0c5020524153': u'Administrative Tools', + u'd34a6ca6-62c2-4c34-8a7c-14709c1ad938': u'Common Places FS Folder', + u'd426cfd0-87fc-4906-98d9-a23f5d515d61': u'Windows Search Service Outlook Express Protocol Handler', + u'd4480a50-ba28-11d1-8e75-00c04fa31a86': u'Add Network Place', + u'd450a8a1-9568-45c7-9c0e-b4f9fb4537bd': u'Installed Updates', + u'd555645e-d4f8-4c29-a827-d93c859c4f2a': u'Ease of Access (Ease of Access Center)', + u'd5b1944e-db4e-482e-b3f1-db05827f0978': u'Softex OmniPass Encrypted Folder', + u'd6277990-4c6a-11cf-8d87-00aa0060f5bf': u'Scheduled Tasks', + u'd8559eb9-20c0-410e-beda-7ed416aecc2a': u'Windows Defender', + u'd9ef8727-cac2-4e60-809e-86f80a666c91': u'Secure Startup (BitLocker Drive Encryption)', + u'daf95313-e44d-46af-be1b-cbacea2c3065': u'CLSID_StartMenuProviderFolder', + u'dffacdc5-679f-4156-8947-c5c76bc0b67f': u'Delegate folder that appears in Users Files Folder', + u'e17d4fc0-5564-11d1-83f2-00a0c90dc849': u'Search Results Folder', + u'e211b736-43fd-11d1-9efb-0000f8757fcd': u'Scanners and Cameras', + u'e345f35f-9397-435c-8f95-4e922c26259e': u'CLSID_StartMenuPathCompleteProviderFolder', + u'e413d040-6788-4c22-957e-175d1c513a34': u'Sync Center Conflict Delegate Folder', + u'e773f1af-3a65-4866-857d-846fc9c4598a': u'Shell Storage Folder Viewer', + u'e7de9b1a-7533-4556-9484-b26fb486475e': u'Network Map', + u'e7e4bc40-e76a-11ce-a9bb-00aa004ae837': u'Shell DocObject Viewer', + u'e88dcce0-b7b3-11d1-a9f0-00aa0060fa31': u'Compressed Folder', + u'e95a4861-d57a-4be1-ad0f-35267e261739': u'Windows SideShow', + u'e9950154-c418-419e-a90a-20c5287ae24b': u'Sensors (Location and Other Sensors)', + u'ed50fc29-b964-48a9-afb3-15ebb9b97f36': u'PrintHood delegate folder', + u'ed7ba470-8e54-465e-825c-99712043e01c': u'All Tasks', + u'ed834ed6-4b5a-4bfe-8f11-a626dcb6a921': u'Personalization Control Panel', + u'edc978d6-4d53-4b2f-a265-5805674be568': u'Stream Backed Folder', + u'f02c1a0d-be21-4350-88b0-7367fc96ef3c': u'Computers and Devices', + u'f1390a9a-a3f4-4e5d-9c5f-98f3bd8d935c': u'Sync Setup Delegate Folder', + u'f3f5824c-ad58-4728-af59-a1ebe3392799': u'Sticky Notes Namespace Extension for Windows Desktop Search', + u'f5175861-2688-11d0-9c5e-00aa00a45957': u'Subscription Folder', + u'f6b6e965-e9b2-444b-9286-10c9152edbc5': u'History Vault', + u'f8c2ab3b-17bc-41da-9758-339d7dbf2d88': u'Previous Versions Results Folder', + u'f90c627b-7280-45db-bc26-cce7bdd620a4': u'All Tasks', + u'f942c606-0914-47ab-be56-1321b8035096': u'Storage Spaces', + u'fb0c9c8a-6c50-11d1-9f1d-0000f8757fcd': u'Scanners & Cameras', + u'fbf23b42-e3f0-101b-8488-00aa003e56f8': u'Internet Explorer', + u'fe1290f0-cfbd-11cf-a330-00aa00c16e65': u'Directory', + u'ff393560-c2a7-11cf-bff4-444553540000': u'History', +} diff --git a/plaso/winreg/__init__.py b/plaso/winreg/__init__.py new file mode 100644 index 0000000..0c8696c --- /dev/null +++ b/plaso/winreg/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plaso/winreg/cache.py b/plaso/winreg/cache.py new file mode 100644 index 0000000..dc78e31 --- /dev/null +++ b/plaso/winreg/cache.py @@ -0,0 +1,142 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Interface and plugins for caching of Windows Registry objects.""" + +import abc + +from plaso.lib import errors +from plaso.lib import registry + + +class WinRegistryCache(object): + """Class that implements the Windows Registry objects cache. + + There are some values that are valid for the duration of an entire run + against an image, such as code_page, etc. + + However there are other values that should only be valid for each + Windows Registry file, such as a current_control_set. The Windows Registry + objects cache is designed to store those short lived cache values, so they + can be calculated once for each Windows Registry file, yet do not live + across all files parsed within an image. + """ + + def __init__(self): + """Initialize the cache object.""" + super(WinRegistryCache, self).__init__() + self.attributes = {} + + def BuildCache(self, hive, reg_type): + """Builds up the cache. + + Args: + hive: The WinRegistry object. + reg_type: The Registry type, eg. "SYSTEM", "NTUSER". + """ + for _, cl in WinRegCachePlugin.classes.items(): + try: + plugin = cl(reg_type) + value = plugin.Process(hive) + if value: + self.attributes[plugin.ATTRIBUTE] = value + except errors.WrongPlugin: + pass + + +class WinRegCachePlugin(object): + """Class that implement the Window Registry cache plugin interface.""" + + __metaclass__ = registry.MetaclassRegistry + __abstract = True + + # Define the needed attributes. + ATTRIBUTE = '' + + REG_TYPE = '' + REG_KEY = '' + + def __init__(self, reg_type): + """Initialize the plugin. + + Args: + reg_type: The detected Windows Registry type. This value should match + the REG_TYPE value defined by the plugins. + """ + super(WinRegCachePlugin, self).__init__() + if self.REG_TYPE.lower() != reg_type.lower(): + raise errors.WrongPlugin(u'Not the correct Windows Registry type.') + + def Process(self, hive): + """Extract the correct key and get the value. + + Args: + hive: The Windows Registry hive object (instance of WinRegistry). + """ + if not self.REG_KEY: + return + + key = hive.GetKeyByPath(self.REG_KEY) + + if not key: + return + + return self.GetValue(key) + + @abc.abstractmethod + def GetValue(self, key): + """Extract the attribute from the provided key.""" + + +class CurrentControl(WinRegCachePlugin): + """Fetch information about the current control set.""" + + ATTRIBUTE = 'current_control_set' + + REG_TYPE = 'SYSTEM' + REG_KEY = '\\Select' + + def GetValue(self, key): + """Extract current control set information.""" + value = key.GetValue('Current') + + if not value and not value.DataIsInteger(): + return None + + key_number = value.data + + # If the value is Zero then we need to check + # other keys. + # The default behavior is: + # 1. Use the "Current" value. + # 2. Use the "Default" value. + # 3. Use the "LastKnownGood" value. + if key_number == 0: + default_value = key.GetValue('Default') + lastgood_value = key.GetValue('LastKnownGood') + + if default_value and default_value.DataIsInteger(): + key_number = default_value.data + + if not key_number: + if lastgood_value and lastgood_value.DataIsInteger(): + key_number = lastgood_value.data + + if key_number <= 0 or key_number > 999: + return None + + return u'ControlSet{0:03d}'.format(key_number) diff --git a/plaso/winreg/cache_test.py b/plaso/winreg/cache_test.py new file mode 100644 index 0000000..37086ee --- /dev/null +++ b/plaso/winreg/cache_test.py @@ -0,0 +1,49 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the Windows Registry objects cache.""" + +import unittest + +from plaso.winreg import cache +from plaso.winreg import test_lib +from plaso.winreg import winregistry + + +class CacheTest(test_lib.WinRegTestCase): + """Tests for the Windows Registry objects cache.""" + + def testBuildCache(self): + """Tests creating a Windows Registry objects cache.""" + registry = winregistry.WinRegistry( + winregistry.WinRegistry.BACKEND_PYREGF) + + test_file = self._GetTestFilePath(['SYSTEM']) + file_entry = self._GetTestFileEntry(test_file) + winreg_file = registry.OpenFile(file_entry, codepage='cp1252') + + winreg_cache = cache.WinRegistryCache() + + # Test if this function does not raise an exception. + winreg_cache.BuildCache(winreg_file, 'SYSTEM') + + self.assertEqual( + winreg_cache.attributes['current_control_set'], 'ControlSet001') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/winreg/interface.py b/plaso/winreg/interface.py new file mode 100644 index 0000000..86526e2 --- /dev/null +++ b/plaso/winreg/interface.py @@ -0,0 +1,227 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The interface for Windows Registry related objects.""" + +import abc + + +class WinRegKey(object): + """Abstract class to represent the Windows Registry key interface.""" + + PATH_SEPARATOR = u'\\' + + @abc.abstractproperty + def path(self): + """The path of the key.""" + + @abc.abstractproperty + def name(self): + """The name of the key.""" + + @abc.abstractproperty + def offset(self): + """The offset of the key within the Windows Registry file.""" + + @abc.abstractproperty + def last_written_timestamp(self): + """The last written time of the key represented as a timestamp.""" + + @abc.abstractproperty + def number_of_values(self): + """The number of values within the key.""" + + @abc.abstractmethod + def GetValue(self, name): + """Retrieves a value by name. + + Args: + name: Name of the value or an empty string for the default value. + + Returns: + An instance of a Windows Registry value object (WinRegValue) if + a corresponding value was found or None if not. + """ + + @abc.abstractmethod + def GetValues(self): + """Retrieves all values within the key. + + Yields: + Windows Registry value objects (instances of WinRegValue) that represent + the values stored within the key. + """ + + @abc.abstractproperty + def number_of_subkeys(self): + """The number of subkeys within the key.""" + + @abc.abstractmethod + def GetSubkey(self, name): + """Retrive a subkey by name. + + Args: + name: The relative path of the current key to the desired one. + + Returns: + The subkey with the relative path of name or None if not found. + """ + + @abc.abstractmethod + def GetSubkeys(self): + """Retrieves all subkeys within the key. + + Yields: + Windows Registry key objects (instances of WinRegKey) that represent + the subkeys stored within the key. + """ + + +class WinRegValue(object): + """Abstract class to represent the Windows Registry value interface.""" + + REG_NONE = 0 + REG_SZ = 1 + REG_EXPAND_SZ = 2 + REG_BINARY = 3 + REG_DWORD = 4 + REG_DWORD_LITTLE_ENDIAN = 4 + REG_DWORD_BIG_ENDIAN = 5 + REG_LINK = 6 + REG_MULTI_SZ = 7 + REG_RESOURCE_LIST = 8 + REG_FULL_RESOURCE_DESCRIPTOR = 9 + REG_RESOURCE_REQUIREMENT_LIST = 10 + REG_QWORD = 11 + + _DATA_TYPE_STRINGS = { + 0: u'REG_NONE', + 1: u'REG_SZ', + 2: u'REG_EXPAND_SZ', + 3: u'REG_BINARY', + 4: u'REG_DWORD_LE', + 5: u'REG_DWORD_BE', + 6: u'REG_LINK', + 7: u'REG_MULTI_SZ', + 8: u'REG_RESOURCE_LIST', + 9: u'REG_FULL_RESOURCE_DESCRIPTOR', + 10: u'REG_RESOURCE_REQUIREMENT_LIST', + 11: u'REG_QWORD' + } + + def __init__(self): + """Default constructor for the Windows Registry value.""" + self._data = u'' + + @abc.abstractproperty + def name(self): + """The name of the value.""" + + @abc.abstractproperty + def offset(self): + """The offset of the value within the Windows Registry file.""" + + @abc.abstractproperty + def data_type(self): + """Numeric value that contains the data type.""" + + @property + def data_type_string(self): + """String representation of the data type.""" + return self._DATA_TYPE_STRINGS.get(self.data_type, u'UNKNOWN') + + @abc.abstractproperty + def raw_data(self): + """The value data as a byte string.""" + + @abc.abstractproperty + def data(self): + """The value data as a native Python object.""" + + def DataIsInteger(self): + """Determines, based on the data type, if the data is an integer. + + The data types considered strings are: REG_DWORD (REG_DWORD_LITTLE_ENDIAN), + REG_DWORD_BIG_ENDIAN and REG_QWORD. + + Returns: + True if the data is an integer, false otherwise. + """ + return self.data_type in [ + self.REG_DWORD, self.REG_DWORD_BIG_ENDIAN, self.REG_QWORD] + + def DataIsString(self): + """Determines, based on the data type, if the data is a string. + + The data types considered strings are: REG_SZ and REG_EXPAND_SZ. + + Returns: + True if the data is a string, false otherwise. + """ + return self.data_type in [self.REG_SZ, self.REG_EXPAND_SZ] + + def DataIsMultiString(self): + """Determines, based on the data type, if the data is a multi string. + + The data types considered multi strings are: REG_MULTI_SZ. + + Returns: + True if the data is a multi string, false otherwise. + """ + return self.data_type == self.REG_MULTI_SZ + + def DataIsBinaryData(self): + """Determines, based on the data type, if the data is binary data. + + The data types considered binary data are: REG_BINARY. + + Returns: + True if the data is a multi string, false otherwise. + """ + return self.data_type == self.REG_BINARY + + +class WinRegFile(object): + """Abstract class to represent the Windows Registry file interface.""" + + def __init__(self): + """Default constructor for the Windows Registry file.""" + self._mounted_key_path = u'' + + @abc.abstractmethod + def Open(self, file_object, codepage='cp1252'): + """Opens the Windows Registry file. + + Args: + file_object: The file-like object of the Windows Registry file. + codepage: Optional codepage for ASCII strings, default is cp1252. + """ + + @abc.abstractmethod + def Close(self): + """Closes the Windows Registry file.""" + + @abc.abstractmethod + def GetKeyByPath(self, registry_path): + """Retrieves a specific key defined by the Registry path. + + Args: + path: the Registry path. + + Returns: + The key (instance of WinRegKey) if available or None otherwise. + """ diff --git a/plaso/winreg/path_expander.py b/plaso/winreg/path_expander.py new file mode 100644 index 0000000..d8bdcaf --- /dev/null +++ b/plaso/winreg/path_expander.py @@ -0,0 +1,81 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The Windows Registry key path expander.""" + + +class WinRegistryKeyPathExpander(object): + """Class that implements the Windows Registry key path expander object.""" + + def __init__(self, reg_cache=None): + """Initialize the path expander object. + + Args: + reg_cache: Optional Registry objects cache (insance of WinRegistryCache). + """ + super(WinRegistryKeyPathExpander, self).__init__() + self._reg_cache = reg_cache + + def ExpandPath(self, key_path, pre_obj=None): + """Expand a Registry key path based on attributes in pre calculated values. + + A Registry key path may contain paths that are attributes, based on + calculations from either preprocessing or based on each individual + Windows Registry file. + + An attribute is defined as anything within a curly bracket, eg. + "\\System\\{my_attribute}\\Path\\Keyname". If the attribute my_attribute + is defined in either the preprocessing object or the Registry objects + cache it's value will be replaced with the attribute name, e.g. + "\\System\\MyValue\\Path\\Keyname". + + If the Registry path needs to have curly brackets in the path then + they need to be escaped with another curly bracket, eg + "\\System\\{my_attribute}\\{{123-AF25-E523}}\\KeyName". In this + case the {{123-AF25-E523}} will be replaced with "{123-AF25-E523}". + + Args: + key_path: The Registry key path before being expanded. + pre_obj: Optional preprocess object that contains stored values from + the image. + + Returns: + A Registry key path that's expanded based on attribute values. + + Raises: + KeyError: If an attribute name is in the key path yet not set in + either the Registry objects cache nor in the preprocessing + object a KeyError will be raised. + """ + expanded_key_path = u'' + key_dict = {} + if self._reg_cache: + key_dict.update(self._reg_cache.attributes.items()) + + if pre_obj: + key_dict.update(pre_obj.__dict__.items()) + + try: + expanded_key_path = key_path.format(**key_dict) + except KeyError as exception: + raise KeyError(u'Unable to expand path with error: {0:s}'.format( + exception)) + + if not expanded_key_path: + raise KeyError(u'Unable to expand path, no value returned.') + + return expanded_key_path diff --git a/plaso/winreg/test_lib.py b/plaso/winreg/test_lib.py new file mode 100644 index 0000000..db92d3c --- /dev/null +++ b/plaso/winreg/test_lib.py @@ -0,0 +1,220 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Windows Registry related functions and classes for testing.""" + +import construct +import os +import unittest + +from dfvfs.lib import definitions +from dfvfs.path import factory as path_spec_factory +from dfvfs.resolver import resolver as path_spec_resolver + +from plaso.winreg import interface + + +class TestRegKey(interface.WinRegKey): + """Implementation of the Registry key interface for testing.""" + + def __init__(self, path, last_written_timestamp, values, offset=0, + subkeys=None): + """An abstract object for a Windows Registry key. + + This implementation is more a manual one, so it can be used for + testing the Registry plugins without requiring a full blown + Windows Registry file to extract key values. + + Args: + path: The full key name and path. + last_written_timestamp: An integer containing the the last written + timestamp of the Registry key. + values: A list of TestRegValue values this key holds. + offset: A byte offset into the Windows Registry file where the entry lies. + subkeys: A list of subkeys this key has. + """ + super(TestRegKey, self).__init__() + self._name = None + self._path = path + self._last_written_timestamp = last_written_timestamp + self._values = values + self._offset = offset + if subkeys is None: + self._subkeys = [] + else: + self._subkeys = subkeys + + @property + def path(self): + """The path of the key.""" + return self._path + + @property + def name(self): + """The name of the key.""" + if not self._name and self._path: + self._name = self._path.split(self.PATH_SEPARATOR)[-1] + return self._name + + @property + def offset(self): + """The offset of the key within the Windows Registry file.""" + return self._offset + + @property + def last_written_timestamp(self): + """The last written time of the key represented as a timestamp.""" + return self._last_written_timestamp + + def number_of_values(self): + """The number of values within the key.""" + return len(self._values) + + def GetValue(self, name): + """Return a WinRegValue object for a specific Registry key path.""" + for value in self._values: + if value.name == name: + return value + + def GetValues(self): + """Return a list of all values from the Registry key.""" + return self._values + + def number_of_subkeys(self): + """The number of subkeys within the key.""" + return len(self._subkeys) + + def GetSubkey(self, name): + """Retrieve a subkey by name. + + Args: + name: The relative path of the current key to the desired one. + + Returns: + The subkey with the relative path of name or None if not found. + """ + for subkey in self._subkeys: + if subkey.name == name: + return subkey + return + + def GetSubkeys(self): + """Return a list of all subkeys.""" + return self._subkeys + + +class TestRegValue(interface.WinRegValue): + """Implementation of the Registry value interface for testing.""" + + _INT32_BIG_ENDIAN = construct.SBInt32('value') + _INT32_LITTLE_ENDIAN = construct.SLInt32('value') + _INT64_LITTLE_ENDIAN = construct.SLInt64('value') + + def __init__(self, name, data, data_type, offset=0): + """Set up the test reg value object.""" + super(TestRegValue, self).__init__() + self._name = name + self._data = data + self._data_type = data_type + self._offset = offset + self._type_str = '' + + @property + def name(self): + """The name of the value.""" + return self._name + + @property + def offset(self): + """The offset of the value within the Windows Registry file.""" + return self._offset + + @property + def data_type(self): + """Numeric value that contains the data type.""" + return self._data_type + + @property + def raw_data(self): + """The value data as a byte string.""" + return self._data + + @property + def data(self): + """The value data as a native Python object.""" + if not self._data: + return None + + if self._data_type in [self.REG_SZ, self.REG_EXPAND_SZ, self.REG_LINK]: + try: + return unicode(self._data.decode('utf-16-le')) + except UnicodeError: + pass + + elif self._data_type == self.REG_DWORD and len(self._data) == 4: + return self._INT32_LITTLE_ENDIAN.parse(self._data) + + elif self._data_type == self.REG_DWORD_BIG_ENDIAN and len(self._data) == 4: + return self._INT32_BIG_ENDIAN.parse(self._data) + + elif self._data_type == self.REG_QWORD and len(self._data) == 8: + return self._INT64_LITTLE_ENDIAN.parse(self._data) + + elif self._data_type == self.REG_MULTI_SZ: + try: + utf16_string = unicode(self._data.decode('utf-16-le')) + return filter(None, utf16_string.split('\x00')) + except UnicodeError: + pass + + return self._data + + +class WinRegTestCase(unittest.TestCase): + """The unit test case for winreg.""" + + _TEST_DATA_PATH = os.path.join(os.getcwd(), 'test_data') + + # Show full diff results, part of TestCase so does not follow our naming + # conventions. + maxDiff = None + + def _GetTestFilePath(self, path_segments): + """Retrieves the path of a test file relative to the test data directory. + + Args: + path_segments: the path segments inside the test data directory. + + Returns: + A path of the test file. + """ + # Note that we need to pass the individual path segments to os.path.join + # and not a list. + return os.path.join(self._TEST_DATA_PATH, *path_segments) + + def _GetTestFileEntry(self, path): + """Retrieves the test file entry. + + Args: + path: the path of the test file. + + Returns: + The test file entry (instance of dfvfs.FileEntry). + """ + path_spec = path_spec_factory.Factory.NewPathSpec( + definitions.TYPE_INDICATOR_OS, location=path) + return path_spec_resolver.Resolver.OpenFileEntry(path_spec) diff --git a/plaso/winreg/utils.py b/plaso/winreg/utils.py new file mode 100644 index 0000000..3a7e0f7 --- /dev/null +++ b/plaso/winreg/utils.py @@ -0,0 +1,44 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains Windows Registry utility functions.""" + +from plaso.winreg import interface + + +def WinRegBasename(path): + """Determines the basename for a Windows Registry path. + + Trailing key separators are igored. + + Args: + path: a Windows registy path with \\ as the key separator. + + Returns: + The basename (or last path segment). + """ + # Strip trailing key separators. + while path and path[-1] == interface.WinRegKey.PATH_SEPARATOR: + path = path[:-1] + if path: + _, _, path = path.rpartition(interface.WinRegKey.PATH_SEPARATOR) + return path + +# TOOD: create a function to return the values as a dict. +# this function should replace the repeated code blocks in multiple plugins. + +# TODO: create a function to extract string data from a registry value. diff --git a/plaso/winreg/winpyregf.py b/plaso/winreg/winpyregf.py new file mode 100644 index 0000000..a551591 --- /dev/null +++ b/plaso/winreg/winpyregf.py @@ -0,0 +1,384 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Pyregf specific implementation for the Windows Registry file access.""" + +import logging + +from plaso.lib import errors +from plaso.lib import timelib +from plaso.winreg import interface + +import pyregf + + +if pyregf.get_version() < '20130716': + raise ImportWarning('WinPyregf requires at least pyregf 20130716.') + + +class WinPyregfKey(interface.WinRegKey): + """Implementation of a Windows Registry key using pyregf.""" + + def __init__(self, pyregf_key, parent_path=u'', root=False): + """Initializes a Windows Registry key object. + + Args: + pyregf_key: An instance of a pyregf.key object. + parent_path: The path of the parent key. + root: A boolean key indicating we are dealing with a root key. + """ + super(WinPyregfKey, self).__init__() + self._pyregf_key = pyregf_key + # Adding few checks to make sure the root key is not + # invalid in plugin checks (root key is equal to the + # path separator). + if parent_path == self.PATH_SEPARATOR: + parent_path = u'' + if root: + self._path = self.PATH_SEPARATOR + else: + self._path = self.PATH_SEPARATOR.join( + [parent_path, self._pyregf_key.name]) + + # pylint: disable=method-hidden + @property + def path(self): + """The path of the key.""" + return self._path + + # pylint: disable=function-redefined,arguments-differ,method-hidden + @path.setter + def path(self, value): + """Set the value of the path explicitly.""" + self._path = value + + @property + def name(self): + """The name of the key.""" + return self._pyregf_key.name + + @property + def offset(self): + """The offset of the key within the Windows Registry file.""" + return self._pyregf_key.offset + + @property + def last_written_timestamp(self): + """The last written time of the key represented as a timestamp.""" + return timelib.Timestamp.FromFiletime( + self._pyregf_key.get_last_written_time_as_integer()) + + @property + def number_of_values(self): + """The number of values within the key.""" + return self._pyregf_key.number_of_values + + def GetValue(self, name): + """Retrieves a value by name. + + Args: + name: Name of the value or an empty string for the default value. + + Returns: + A Windows Registry value object (instance of WinRegValue) if + a corresponding value was found or None if not. + """ + # Value names are not unique and pyregf provides first match for + # the value. If this becomes problematic this method needs to + # be changed into a generator, iterating through all returned value + # for a given name. + pyregf_value = self._pyregf_key.get_value_by_name(name) + if pyregf_value: + return WinPyregfValue(pyregf_value) + return None + + @property + def number_of_subkeys(self): + """The number of subkeys within the key.""" + return self._pyregf_key.number_of_sub_keys + + def GetValues(self): + """Retrieves all values within the key. + + Yields: + Windows Registry value objects (instances of WinRegValue) that represent + the values stored within the key. + """ + for pyregf_value in self._pyregf_key.values: + yield WinPyregfValue(pyregf_value) + + def GetSubkey(self, name): + """Retrive a subkey by name. + + Args: + name: The relative path of the current key to the desired one. + + Returns: + The subkey with the relative path of name or None if not found. + """ + subkey = self._pyregf_key.get_sub_key_by_name(name) + + if subkey: + return WinPyregfKey(subkey, self.path) + + path_subkey = self._pyregf_key.get_sub_key_by_path(name) + if path_subkey: + path, _, _ = name.rpartition('\\') + path = u'\\'.join([self.path, path]) + return WinPyregfKey(path_subkey, path) + + def GetSubkeys(self): + """Retrieves all subkeys within the key. + + Yields: + Windows Registry key objects (instances of WinRegKey) that represent + the subkeys stored within the key. + """ + for pyregf_key in self._pyregf_key.sub_keys: + yield WinPyregfKey(pyregf_key, self.path) + + +class WinPyregfValue(interface.WinRegValue): + """Implementation of a Windows Registry value using pyregf.""" + + def __init__(self, pyregf_value): + """Initializes a Windows Registry value object. + + Args: + pyregf_value: An instance of a pyregf.value object. + """ + super(WinPyregfValue, self).__init__() + self._pyregf_value = pyregf_value + self._type_str = '' + + @property + def name(self): + """The name of the value.""" + return self._pyregf_value.name + + @property + def offset(self): + """The offset of the value within the Windows Registry file.""" + return self._pyregf_value.offset + + @property + def data_type(self): + """Numeric value that contains the data type.""" + return self._pyregf_value.type + + @property + def raw_data(self): + """The value data as a byte string.""" + try: + return self._pyregf_value.data + except IOError: + raise errors.WinRegistryValueError( + 'Unable to read data from value: {0:s}'.format( + self._pyregf_value.name)) + + @property + def data(self): + """The value data as a native Python object.""" + if self._pyregf_value.type in [ + self.REG_SZ, self.REG_EXPAND_SZ, self.REG_LINK]: + try: + return self._pyregf_value.data_as_string + except IOError: + pass + + elif self._pyregf_value.type in [ + self.REG_DWORD, self.REG_DWORD_BIG_ENDIAN, self.REG_QWORD]: + try: + return self._pyregf_value.data_as_integer + except (IOError, OverflowError): + # TODO: Rethink this approach. The value is not -1, but we cannot + # return the raw data, since the calling plugin expects an integer + # here. + return -1 + + # TODO: Add support for REG_MULTI_SZ to pyregf. + elif self._pyregf_value.type == self.REG_MULTI_SZ: + if self._pyregf_value.data is None: + return u'' + + try: + utf16_string = unicode(self._pyregf_value.data.decode('utf-16-le')) + return filter(None, utf16_string.split('\x00')) + except UnicodeError: + pass + + return self._pyregf_value.data + + +class WinPyregfFile(interface.WinRegFile): + """Implementation of a Windows Registry file pyregf.""" + + def __init__(self): + """Initializes a Windows Registry key object.""" + super(WinPyregfFile, self).__init__() + self._pyregf_file = pyregf.file() + self.name = '' + self._base_key = None + + def Open(self, file_entry, codepage='cp1252'): + """Opens the Windows Registry file. + + Args: + file_entry: The file entry object. + name: The name of the file. + codepage: Optional codepage for ASCII strings, default is cp1252. + """ + # TODO: Add a more elegant error handling to this issue. There are some + # code pages that are not supported by the parent library. However we + # need to properly set the codepage so the library can properly interpret + # values in the Registry. + try: + self._pyregf_file.set_ascii_codepage(codepage) + + except (TypeError, IOError): + logging.error(( + u'Unable to set the Windows Registry file codepage: {0:s}. ' + u'Ignoring provided value.').format(codepage)) + + self._file_object = file_entry.GetFileObject() + self._pyregf_file.open_file_object(self._file_object) + + self._base_key = self._pyregf_file.get_root_key() + + # TODO: move to a pyvfs like Registry sub-system. + self.name = file_entry.name + + def Close(self): + """Closes the Windows Registry file.""" + self._pyregf_file.close() + self._file_object.close() + + def GetKeyByPath(self, path): + """Retrieves a specific key defined by the Registry path. + + Args: + path: the Registry path. + + Returns: + The key (instance of WinRegKey) if available or None otherwise. + """ + if not path: + return None + + if not self._base_key: + return None + + pyregf_key = self._base_key.get_sub_key_by_path(path) + + if not pyregf_key: + return None + + if pyregf_key.name == self._base_key.name: + root = True + else: + root = False + + parent_path, _, _ = path.rpartition(interface.WinRegKey.PATH_SEPARATOR) + return WinPyregfKey(pyregf_key, parent_path, root) + + +class WinRegistry(object): + """Provides access to the Windows Registry file.""" + # TODO: deprecate this class. + + def __init__(self, file_entry, codepage='cp1252'): + """Constructor for the Registry object. + + Args: + file_entry: A file entry object. + codepage: The codepage of the Registry hive, used for string + representation. + """ + self._pyregf_file = pyregf.file() + + try: + # TODO: Add a more elegant error handling to this issue. There are some + # code pages that are not supported by the parent library. However we + # need to properly set the codepage so the library can properly interpret + # values in the Registry. + self._pyregf_file.set_ascii_codepage(codepage) + except (TypeError, IOError): + logging.error( + u'Unable to set the Registry codepage to: {}. Not setting it'.format( + codepage)) + + file_object = file_entry.GetFileObject() + self._pyregf_file.open_file_object(file_object) + + def GetRoot(self): + """Return the root key of the Registry hive.""" + key = WinPyregfKey(self._pyregf_file.get_root_key()) + # Change root key name to avoid key based plugins failing. + key.path = '' + return key + + def GetKey(self, key): + """Return a Registry key as a WinPyregfKey object.""" + if not key: + return None + + my_key = self._pyregf_file.get_key_by_path(key) + if not my_key: + return None + + path, _, _ = key.rpartition('\\') + + return WinPyregfKey(my_key, path) + + def __contains__(self, key): + """Check if a certain Registry key exists within the hive.""" + try: + return bool(self.GetKey(key)) + except KeyError: + return False + + def GetAllSubkeys(self, key): + """Generator that returns all sub keys of any given Registry key. + + Args: + key: A Windows Registry key string or object (instance of WinPyregfKey). + + Yields: + Windows Registry key objects (instances of WinPyregfKey) that represent + the subkeys stored within the key. + """ + # TODO: refactor this function. + # TODO: remove the hasattr check. + if not hasattr(key, 'GetSubkeys'): + key = self.GetKey(key) + + for subkey in key.GetSubkeys(): + yield subkey + if subkey.number_of_subkeys != 0: + for s in self.GetAllSubkeys(subkey): + yield s + + def __iter__(self): + """Default iterator, returns all subkeys of the Windows Registry file.""" + root = self.GetRoot() + for key in self.GetAllSubkeys(root): + yield key + + +def GetLibraryVersion(): + """Return the pyregf and libregf version.""" + return pyregf.get_version() diff --git a/plaso/winreg/winpyregf_test.py b/plaso/winreg/winpyregf_test.py new file mode 100644 index 0000000..881551e --- /dev/null +++ b/plaso/winreg/winpyregf_test.py @@ -0,0 +1,61 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the pyregf Windows Registry back-end.""" + +import unittest + +from plaso.winreg import test_lib +from plaso.winreg import winpyregf + + +class RegistryUnitTest(test_lib.WinRegTestCase): + """Tests for the pyregf Windows Registry back-end.""" + + def _KeyPathCompare(self, winreg_file, key_path): + """Retrieves a key from the file and checks if the path key matches. + + Args: + winreg_file: the Windows Registry file (instance of WinPyregfFile). + key_path: the key path to retrieve and compare. + """ + key = winreg_file.GetKeyByPath(key_path) + self.assertEquals(key.path, key_path) + + def testListKeys(self): + test_file = self._GetTestFilePath(['NTUSER.DAT']) + file_entry = self._GetTestFileEntry(test_file) + winreg_file = winpyregf.WinRegistry(file_entry) + keys = list(winreg_file) + + # Count the number of Registry keys in the hive. + self.assertEquals(len(keys), 1126) + + def testWinPyregf(self): + test_file = self._GetTestFilePath(['NTUSER.DAT']) + file_entry = self._GetTestFileEntry(test_file) + winreg_file = winpyregf.WinPyregfFile() + winreg_file.Open(file_entry) + + self._KeyPathCompare(winreg_file, u'\\') + self._KeyPathCompare(winreg_file, u'\\Printers') + self._KeyPathCompare(winreg_file, u'\\Printers\\Connections') + self._KeyPathCompare(winreg_file, u'\\Software') + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/winreg/winregistry.py b/plaso/winreg/winregistry.py new file mode 100644 index 0000000..9999858 --- /dev/null +++ b/plaso/winreg/winregistry.py @@ -0,0 +1,149 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the Windows Registry class.""" + +from plaso.winreg import interface +from plaso.winreg import winpyregf + + +class WinRegistry(object): + """Class to provided a uniform way to access the Windows Registry.""" + + BACKEND_PYREGF = 1 + + _KNOWN_KEYS = { + 'NTUSER.DAT': '\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer', + 'SAM': '\\SAM\\Domains\\Account\\Users', + 'SECURITY': '\\Policy\\PolAdtEv', + 'SOFTWARE': '\\Microsoft\\Windows\\CurrentVersion\\App Paths', + 'SYSTEM': '\\Select', + } + + # TODO: this list is not finished yet and will need some more research. + # For now an empty string represent the root and None an unknown or + # not mounted. + _FILENAME_MOUNTED_PATHS = { + 'DEFAULT': None, + 'NTUSER.DAT': 'HKEY_CURRENT_USER', + 'NTUSER.MAN': None, + 'REG.DAT': '', + 'SAM': 'HKEY_LOCAL_MACHINE\\SAM', + 'SECURITY': 'HKEY_LOCAL_MACHINE\\Security', + 'SOFTWARE': 'HKEY_LOCAL_MACHINE\\Software', + 'SYSTEM': 'HKEY_LOCAL_MACHINE\\System', + 'SYSCACHE.HVE': None, + 'SYSTEM.DAT': 'HKEY_LOCAL_MACHINE', + 'USERDIFF': None, + 'USERS.DAT': 'HKEY_USERS', + 'USRCLASS.DAT': 'HKEY_CURRENT_USER\\Software\\Classes', + } + + def __init__(self, backend=1): + """Initializes the Windows Registry. + + Args: + backend: The back-end to use to read the Registry structures, the + default is 1 (pyregf). + """ + self._backend = backend + self._files = {} + + @classmethod + def GetMountedPath(cls, filename): + """Determines the mounted path based on the filename. + + Args: + filename: The name of the Windows Registry file. + + Returns: + The mounted path if successful or None otherwise. + """ + return cls._FILENAME_MOUNTED_PATHS.get(filename.upper(), None) + + def OpenFile(self, file_entry, codepage='cp1252'): + """Opens the file object based on the back-end. + + Args: + file_entry: The file entry object. + codepage: Optional extended ASCII string codepage. The default is cp1252. + + Returns: + The a Windows Registry file (instance of WinRegFile) if successful + or None otherwise. + """ + winreg_file = None + + if self._backend == self.BACKEND_PYREGF: + winreg_file = winpyregf.WinPyregfFile() + + if winreg_file: + winreg_file.Open(file_entry, codepage=codepage) + + return winreg_file + + def MountFile(self, winreg_file, mounted_path): + """Mounts a file in the Registry. + + Args: + winreg_file: The Windows Registry file (instance of WinRegFile). + mounted_path: The path of the key where the Windows Registry file + is mounted. + + Raises: + KeyError: if mounted path is already set. + ValueError: if mounted path is not set. + """ + if not mounted_path: + raise ValueError(u'Missing mounted path value') + + if mounted_path in self._files: + raise KeyError(u'Mounted path: {0:s} already set.'.format(mounted_path)) + + self._files[mounted_path] = winreg_file + + def GetKeyByPath(self, path): + """Retrieves a specific key defined by the Registry path. + + Returns: + The key (instance of WinRegKey) if available or None otherwise. + """ + mounted_path = None + if self._files: + for mounted_path in self._files.keys(): + if path.startswith(mounted_path): + break + + if not mounted_path: + return None + + winreg_file = self._files[mounted_path] + + mounted_path_length = len(mounted_path) + + if mounted_path.endswith(interface.WinRegKey.PATH_SEPARATOR): + mounted_path_length -= 1 + + path = path[mounted_path_length:] + + if not winreg_file: + return None + + winreg_key = winreg_file.GetKeyByPath(path) + + # TODO: correct the path of the key for the mounted location. + + return winreg_key diff --git a/plaso/winreg/winregistry_test.py b/plaso/winreg/winregistry_test.py new file mode 100644 index 0000000..69745b6 --- /dev/null +++ b/plaso/winreg/winregistry_test.py @@ -0,0 +1,51 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains the tests for the Windows Registry library.""" + +import unittest + +from plaso.winreg import test_lib +from plaso.winreg import winregistry + + +class RegistryUnitTest(test_lib.WinRegTestCase): + """Tests for the Windows Registry library.""" + + def testMountFile(self): + """Tests mounting REGF files in the Registry.""" + registry = winregistry.WinRegistry( + winregistry.WinRegistry.BACKEND_PYREGF) + + test_file = self._GetTestFilePath(['SOFTWARE']) + file_entry = self._GetTestFileEntry(test_file) + winreg_file = registry.OpenFile(file_entry, codepage='cp1252') + + registry.MountFile(winreg_file, u'HKEY_LOCAL_MACHINE\\Software') + + test_file = self._GetTestFilePath(['NTUSER-WIN7.DAT']) + file_entry = self._GetTestFileEntry(test_file) + winreg_file = registry.OpenFile(file_entry, codepage='cp1252') + + with self.assertRaises(KeyError): + registry.MountFile(winreg_file, u'HKEY_LOCAL_MACHINE\\Software') + + registry.MountFile(winreg_file, u'HKEY_CURRENT_USER') + + +if __name__ == '__main__': + unittest.main() diff --git a/plasov1.2.0-rubanetra0.0.6-distribution.zip b/plasov1.2.0-rubanetra0.0.6-distribution.zip new file mode 100755 index 0000000..0c74a67 Binary files /dev/null and b/plasov1.2.0-rubanetra0.0.6-distribution.zip differ diff --git a/run_tests.py b/run_tests.py new file mode 100755 index 0000000..40ca9f0 --- /dev/null +++ b/run_tests.py @@ -0,0 +1,28 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Script to run the tests.""" + +import unittest +import sys + + +if __name__ == '__main__': + test_suite = unittest.TestLoader().discover('.', pattern='*_test.py') + test_results = unittest.TextTestRunner(verbosity=2).run(test_suite) + if not test_results.wasSuccessful(): + sys.exit(1) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..0263c1e --- /dev/null +++ b/setup.cfg @@ -0,0 +1,37 @@ +[bdist_rpm] +release = 1 +packager = Plaso development team <log2timeline-dev@googlegroups.com> +doc_files = ACKNOWLEDGEMENTS + AUTHORS + LICENSE + README +build_requires = python-setuptools +requires = bencode + dfvfs + ipython + libbde-python + libesedb-python + libevt-python + libevtx-python + libewf-python + libfwsi-python + liblnk-python + libmsiecf-python + libolecf-python + libqcow-python + libregf-python + libsmdev-python + libsmraw-python + libvhdi-python + libvmdk-python + libvshadow-python + protobuf-python + pyparsing + python-contruct + python-dateutil + python-dpkt + python-psutil + python-six + pytsk3 + PyYAML + pytz diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..2edbc03 --- /dev/null +++ b/setup.py @@ -0,0 +1,123 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This is the setup file for the project. The standard setup rules apply: + + python setup.py build + sudo python setup.py install +""" + +import glob +import locale +import os +import sys + +import run_tests + +try: + from setuptools import find_packages, setup, Command +except ImportError: + from distutils.core import find_packages, setup, Command + +version_tuple = (sys.version_info[0], sys.version_info[1]) +if version_tuple < (2, 7) or version_tuple >= (3, 0): + print (u'Unsupported Python version: {0:s}, version 2.7 or higher and ' + u'lower than 3.x required.').format(sys.version) + sys.exit(1) + +# Change PYTHONPATH to include plaso so that we can get the version. +sys.path.insert(0, '.') + +import plaso + + +def GetTools(): + """List up all scripts that should be runable from the command line.""" + tools = [] + + tool_filenames = frozenset([ + u'image_export.py', + u'log2timeline.py', + u'pinfo.py', + u'plasm.py', + u'pprof.py', + u'preg.py', + u'pshell.py', + u'psort.py']) + + for filename in tool_filenames: + tools.append(os.path.join(u'plaso', u'frontend', filename)) + + tool_filenames = frozenset([ + u'plaso_extract_search_history.py']) + + for filename in tool_filenames: + tools.append(os.path.join(u'tools', filename)) + + return tools + + +class TestCommand(Command): + """Run tests, implementing an interface.""" + user_options = [] + + def initialize_options(self): + self._dir = os.getcwd() + + def finalize_options(self): + pass + + def run(self): + test_results = run_tests.RunTests() + +encoding = sys.stdin.encoding + +# Note that sys.stdin.encoding can be None. +if not encoding: + encoding = locale.getpreferredencoding() + +# Make sure the default encoding is set correctly otherwise +# setup.py sdist will fail to include filenames with Unicode characters. +reload(sys) +sys.setdefaultencoding(encoding) + +# Unicode in the description will break python-setuptools, hence +# "Plaso Langar Að Safna Öllu" was removed. +plaso_description = ( + u'plaso is a tool designed to extract timestamps from various files found ' + u'on a typical computer system(s) and aggregate them.') + +setup( + name='plaso', + version=plaso.GetVersion(), + description=plaso_description, + long_description=plaso_description, + license='Apache License, Version 2.0', + url='https://sites.google.com/a/kiddaland.net/plaso', + maintainer='Plaso development team', + maintainer_email='log2timeline-dev@googlegroups.com', + scripts=GetTools(), + cmdclass={'test': TestCommand}, + classifiers=[ + 'Development Status :: 4 - Beta', + 'Environment :: Console', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + ], + package_dir={'plaso': 'plaso'}, + packages=find_packages('.'), +) diff --git a/test_data/$II3DF3L.zip b/test_data/$II3DF3L.zip new file mode 100644 index 0000000..63f0764 Binary files /dev/null and b/test_data/$II3DF3L.zip differ diff --git a/test_data/1b4dd67f29cb1962.automaticDestinations-ms b/test_data/1b4dd67f29cb1962.automaticDestinations-ms new file mode 100644 index 0000000..ed184b5 Binary files /dev/null and b/test_data/1b4dd67f29cb1962.automaticDestinations-ms differ diff --git a/test_data/5afe4de1b92fc382.customDestinations-ms b/test_data/5afe4de1b92fc382.customDestinations-ms new file mode 100644 index 0000000..50a9f0a Binary files /dev/null and b/test_data/5afe4de1b92fc382.customDestinations-ms differ diff --git a/test_data/AccessProtectionLog.txt b/test_data/AccessProtectionLog.txt new file mode 100644 index 0000000..778c35d --- /dev/null +++ b/test_data/AccessProtectionLog.txt @@ -0,0 +1,14 @@ +9/27/2013 2:42:26 PM Blocked by Access Protection rule SOMEDOMAIN\someUser C:\Windows\System32\procexp64.exe C:\Program Files (x86)\McAfee\Common Framework\UdaterUI.exe Common Standard Protection:Prevent termination of McAfee processes Action blocked : Terminate +9/27/2013 2:42:39 PM Blocked by Access Protection rule SOMEDOMAIN\someUser C:\Windows\System32\procexp64.exe C:\Program Files (x86)\McAfee\Common Framework\FrameworkService.exe Common Standard Protection:Prevent termination of McAfee processes Action blocked : Terminate +9/27/2013 2:42:39 PM Blocked by Access Protection rule SOMEDOMAIN\someUser C:\Windows\System32\procexp64.exe C:\Program Files (x86)\McAfee\Common Framework\UdaterUI.exe Common Standard Protection:Prevent termination of McAfee processes Action blocked : Terminate +9/27/2013 2:42:40 PM Blocked by Access Protection rule SOMEDOMAIN\someUser C:\Windows\System32\procexp64.exe C:\Program Files (x86)\McAfee\Common Framework\McTray.exe Common Standard Protection:Prevent termination of McAfee processes Action blocked : Terminate +7/17/2013 1:49:34 PM Would be blocked by Access Protection rule (rule is currently not enforced) NT AUTHORITY\SYSTEM C:\Windows\System32\powercfg.exe \REGISTRY\USER\.DEFAULT\Software\Microsoft\Windows\CurrentVersion\Internet Settings\ZoneMap\AutoDetect Anti-spyware Standard Protection:Protect Internet Explorer favorites and settings Action blocked : Create +7/17/2013 1:49:34 PM Would be blocked by Access Protection rule (rule is currently not enforced) NT AUTHORITY\SYSTEM C:\Windows\System32\powercfg.exe C:\Windows\System32\config\systemprofile\AppData\Local\Microsoft\Windows\Temporary Internet Files\Content.IE5\index.dat Anti-virus Maximum Protection:Protect cached files from password and email address stealers Action blocked : Read +7/17/2013 1:53:31 PM Would be blocked by Access Protection rule (rule is currently not enforced) TheGrid\clu C:\Windows\system32\taskhost.exe C:\Windows\Temp\SDIAG_1893e055-45e8-4dda-a6fc-036616ec15c7\DiagPackage.dll Common Maximum Protection:Prevent creation of new executable files in the Windows folder Action blocked : Create +7/17/2013 1:53:32 PM Would be blocked by Access Protection rule (rule is currently not enforced) TheGrid\clu C:\Windows\System32\sdiagnhost.exe \REGISTRY\USER\S-1-5-21-218510691-2140962509-2033415169-18142\Software\Microsoft\Windows\CurrentVersion\Internet Settings\ZoneMap\AutoDetect Anti-spyware Standard Protection:Protect Internet Explorer favorites and settings Action blocked : Create +7/30/2013 10:06:05 AM Would be blocked by Access Protection rule (rule is currently not enforced) NT AUTHORITY\SYSTEM C:\Windows\TEMP\InstallPlugin_11_8_800_94.exe C:\Windows\Temp\{49568447-C9D4-4C19-942B-4472959CBC07}\fpb.tmp Anti-spyware Maximum Protection:Prevent all programs from running files from the Temp folder Action blocked : Execute +7/30/2013 10:06:06 AM Would be blocked by Access Protection rule (rule is currently not enforced) NT AUTHORITY\SYSTEM C:\Windows\TEMP\InstallPlugin_11_8_800_94.exe C:\Windows\Temp\{05007B29-A945-4346-8B04-7DD2F5453280}\InstallFlashPlayer.exe Common Maximum Protection:Prevent creation of new executable files in the Windows folder Action blocked : Create +7/30/2013 10:18:02 AM Would be blocked by port blocking rule (rule is currently not enforced) C:\Windows\SysWOW64\Macromed\Flash\FlashPlayerUpdateService.exe Common Maximum Protection:Prevent HTTP communication 23.56.2.70:443 +7/30/2013 10:22:48 AM Would be blocked by Access Protection rule (rule is currently not enforced) NT AUTHORITY\SYSTEM C:\Windows\system32\svchost.exe C:\Users\tron\AppData\Roaming\Mozilla\Firefox\prfD430.tmp Common Standard Protection:Protect Mozilla & FireFox files and settings Action blocked : Create +7/30/2013 10:22:48 AM Would be blocked by Access Protection rule (rule is currently not enforced) NT AUTHORITY\SYSTEM C:\Windows\system32\svchost.exe C:\Users\tron\AppData\Roaming\Mozilla\Firefox\Profiles\w77xlhgl.default\webapps\prfD432.tmp Common Standard Protection:Protect Mozilla & FireFox files and settings Action blocked : Delete +7/30/2013 10:22:48 AM Would be blocked by Access Protection rule (rule is currently not enforced) NT AUTHORITY\SYSTEM C:\Windows\system32\svchost.exe C:\Users\tron\AppData\Roaming\Mozilla\Firefox\Profiles\w77xlhgl.default\prfD431.tmp Common Standard Protection:Protect Mozilla & FireFox files and settings Action blocked : Create diff --git a/test_data/CMD.EXE-087B4001.pf b/test_data/CMD.EXE-087B4001.pf new file mode 100755 index 0000000..34fcc95 Binary files /dev/null and b/test_data/CMD.EXE-087B4001.pf differ diff --git a/test_data/Document.doc b/test_data/Document.doc new file mode 100644 index 0000000..148c657 Binary files /dev/null and b/test_data/Document.doc differ diff --git a/test_data/Document.docx b/test_data/Document.docx new file mode 100644 index 0000000..33965f2 Binary files /dev/null and b/test_data/Document.docx differ diff --git a/test_data/Extension Activity b/test_data/Extension Activity new file mode 100644 index 0000000..173a529 Binary files /dev/null and b/test_data/Extension Activity differ diff --git a/test_data/History b/test_data/History new file mode 100644 index 0000000..9fb1f4c Binary files /dev/null and b/test_data/History differ diff --git a/test_data/History.plist b/test_data/History.plist new file mode 100644 index 0000000..dcc2e69 Binary files /dev/null and b/test_data/History.plist differ diff --git a/test_data/INFO2 b/test_data/INFO2 new file mode 100644 index 0000000..410a968 Binary files /dev/null and b/test_data/INFO2 differ diff --git a/test_data/InstallHistory.plist b/test_data/InstallHistory.plist new file mode 100644 index 0000000..02394fc --- /dev/null +++ b/test_data/InstallHistory.plist @@ -0,0 +1,127 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<array> + <dict> + <key>date</key> + <date>2013-11-12T02:59:35Z</date> + <key>displayName</key> + <string>OS X</string> + <key>displayVersion</key> + <string>10.9 (13A603)</string> + <key>packageIdentifiers</key> + <array> + <string>com.apple.pkg.BaseSystemBinaries</string> + <string>com.apple.pkg.BaseSystemResources</string> + <string>com.apple.pkg.Essentials</string> + <string>com.apple.pkg.BSD</string> + <string>com.apple.pkg.JavaTools</string> + <string>com.apple.pkg.AdditionalEssentials</string> + <string>com.apple.pkg.AdditionalSpeechVoices</string> + <string>com.apple.pkg.AsianLanguagesSupport</string> + <string>com.apple.pkg.MediaFiles</string> + <string>com.apple.pkg.JavaEssentials</string> + <string>com.apple.pkg.OxfordDictionaries</string> + <string>com.apple.pkg.X11redirect</string> + <string>com.apple.pkg.OSInstall</string> + <string>com.apple.pkg.update.compatibility.2013.001</string> + </array> + <key>processName</key> + <string>OS X Installer</string> + </dict> + <dict> + <key>date</key> + <date>2013-12-28T04:38:11Z</date> + <key>displayName</key> + <string>VMware Tools</string> + <key>displayVersion</key> + <string>9.6.1</string> + <key>packageIdentifiers</key> + <array> + <string>com.vmware.tools.macos.pkg.files</string> + </array> + <key>processName</key> + <string>Installer</string> + </dict> + <dict> + <key>contentType</key> + <string>config-data</string> + <key>date</key> + <date>2013-12-28T12:14:43Z</date> + <key>displayName</key> + <string>XProtectPlistConfigData</string> + <key>displayVersion</key> + <string>1.0</string> + <key>packageIdentifiers</key> + <array> + <string>com.apple.pkg.XProtectPlistConfigData.2-45</string> + </array> + <key>processName</key> + <string>softwareupdated</string> + </dict> + <dict> + <key>contentType</key> + <string>config-data</string> + <key>date</key> + <date>2013-12-28T12:14:43Z</date> + <key>displayName</key> + <string>Core Services Application Configuration Data</string> + <key>displayVersion</key> + <string>1.0.25</string> + <key>packageIdentifiers</key> + <array> + <string>com.apple.pkg.CoreServicesAppConfigData.2-9</string> + </array> + <key>processName</key> + <string>softwareupdated</string> + </dict> + <dict> + <key>contentType</key> + <string>config-data</string> + <key>date</key> + <date>2013-12-28T12:14:43Z</date> + <key>displayName</key> + <string>Incompatible Kernel Extension Configuration Data</string> + <key>displayVersion</key> + <string>1.18.2</string> + <key>packageIdentifiers</key> + <array> + <string>com.apple.pkg.IncompatibleKextConfigData.3-9</string> + </array> + <key>processName</key> + <string>softwareupdated</string> + </dict> + <dict> + <key>contentType</key> + <string>config-data</string> + <key>date</key> + <date>2013-12-28T12:14:43Z</date> + <key>displayName</key> + <string>Chinese Word List Update</string> + <key>displayVersion</key> + <string>2.1</string> + <key>packageIdentifiers</key> + <array> + <string>com.apple.pkg.ChineseWordlistUpdate.5-17</string> + </array> + <key>processName</key> + <string>softwareupdated</string> + </dict> + <dict> + <key>contentType</key> + <string>config-data</string> + <key>date</key> + <date>2014-01-28T20:38:48Z</date> + <key>displayName</key> + <string>Chinese Word List Update</string> + <key>displayVersion</key> + <string>2.2</string> + <key>packageIdentifiers</key> + <array> + <string>com.apple.pkg.ChineseWordlistUpdate.5-30</string> + </array> + <key>processName</key> + <string>softwareupdated</string> + </dict> +</array> +</plist> diff --git a/test_data/NTUSER-CCLEANER.DAT b/test_data/NTUSER-CCLEANER.DAT new file mode 100755 index 0000000..ae33768 Binary files /dev/null and b/test_data/NTUSER-CCLEANER.DAT differ diff --git a/test_data/NTUSER-RunTests.DAT b/test_data/NTUSER-RunTests.DAT new file mode 100644 index 0000000..a7e65e1 Binary files /dev/null and b/test_data/NTUSER-RunTests.DAT differ diff --git a/test_data/NTUSER-WIN7.DAT b/test_data/NTUSER-WIN7.DAT new file mode 100644 index 0000000..1cda8b2 Binary files /dev/null and b/test_data/NTUSER-WIN7.DAT differ diff --git a/test_data/NTUSER.DAT b/test_data/NTUSER.DAT new file mode 100644 index 0000000..bb3b201 Binary files /dev/null and b/test_data/NTUSER.DAT differ diff --git a/test_data/NeroInfoTool.lnk b/test_data/NeroInfoTool.lnk new file mode 100755 index 0000000..366b5fe Binary files /dev/null and b/test_data/NeroInfoTool.lnk differ diff --git a/test_data/PING.EXE-B29F6629.pf b/test_data/PING.EXE-B29F6629.pf new file mode 100644 index 0000000..8ec39e4 Binary files /dev/null and b/test_data/PING.EXE-B29F6629.pf differ diff --git a/test_data/PLSRecall_Test.dat b/test_data/PLSRecall_Test.dat new file mode 100644 index 0000000..244e099 Binary files /dev/null and b/test_data/PLSRecall_Test.dat differ diff --git a/test_data/SAM b/test_data/SAM new file mode 100644 index 0000000..e11726d Binary files /dev/null and b/test_data/SAM differ diff --git a/test_data/SOFTWARE b/test_data/SOFTWARE new file mode 100644 index 0000000..868db1a Binary files /dev/null and b/test_data/SOFTWARE differ diff --git a/test_data/SOFTWARE-RunTests b/test_data/SOFTWARE-RunTests new file mode 100644 index 0000000..befb388 Binary files /dev/null and b/test_data/SOFTWARE-RunTests differ diff --git a/test_data/SYSTEM b/test_data/SYSTEM new file mode 100644 index 0000000..d9c28d2 Binary files /dev/null and b/test_data/SYSTEM differ diff --git a/test_data/Symantec.Log b/test_data/Symantec.Log new file mode 100644 index 0000000..6e52a08 --- /dev/null +++ b/test_data/Symantec.Log @@ -0,0 +1,8 @@ +2A0A1E011B21,7,3,8,SQLZZSERVEDD,SYSTEM,,,,,,,16777216,"New virus definition file loaded. Version: 141129w.",0,,0,,,,,0,,,,,,,,,,,{AAAAAAA-4F7F-4896-8C5A-5CEDFB6A9DC0},,,,BUSINES1,00:30:12:9C:58:3B,11.0.4000.290,,,,,,,,,,,,,,,,0,,,,, +2A0A1E0A2F1D,5,1,2,SQLZZSERVEDD,davnads,W32.Changeup!gen33,D:\Twinkle_Prod$\VM11 XXX\outside\test.exe.txt,5,3,14,256,39866436,"",0,,0,201 4 6 1 65542 0 0 0 0 0 0,0,54202,0,1,0,0,0,0,,0,2,4,0,,{AAAAAAA-4F7F-4896-8C5A-5CEDFB6A9DC0},,,,BUSINES1,00:30:12:9C:58:3B,11.0.4000.290,,,,,,,,,,,,,,,,0,,74ac79f8-d0c0-4065-acd7-27af2fc7ec4c,0,, +2A0A1E0A300E,46,1,2,SQLZZSERVEDD,davnads,W32.Changeup!gen23,D:\Twinkle_Prod$\VM11 XXX\outside\test.exe.txt,5,3,19,256,33554436,"",1354297652,,0,101 {A149F6F3-67B6-4D50-8A50-82E44938E96E} 0 1 W32.Changeup!gen23 2;0;13 0 0 74ac79f8-d0c0-4065-acd7-27af2fc7ec4c 0,0,54202,0,0,0,,,0,,0,0,1,0,,{AAAAAAA-4F7F-4896-8C5A-5CEDFB6A9DC0},,,,BUSINES1,00:30:12:9C:58:3B,11.0.4000.290,,,,,,,,,,,,,,,,999,,7v220ss1-3339-4e86-b06e-291ca5a2a87d,0,, +2A0A1E0A300E,5,1,2,SQLZZSERVEDD,davnads,W32.Changeup!gen23,D:\Twinkle_Prod$\VM11 XXX\outside\test.exe.txt,5,3,19,256,39866436,"",1354297652,,0,201 4 6 1 65542 0 0 0 0 0 0,1488777566,54202,0,1,0,0,0,0,,0,2,4,1488777566,,{AAAAAAA-4F7F-4896-8C5A-5CEDFB6A9DC0},,,,BUSINES1,00:30:12:9C:58:3B,11.0.4000.290,,,,,,,,,,,,,,,,0,,7v220ss1-3339-4e86-b06e-291ca5a2a87d,405536768,, +2A0A1E0A300E,50,1,2,SQLZZSERVEDD,davnads,,Internet browser temporary file cache,5,3,3,256,4,"",1354297652,,0,101 0 0 Browser Cache Remediation Delete Internet browser temporary file cache 2011 1 74ac79f8-d0c0-4065-acd7-27af2fc7ec4c 0,0,54202,0,0,0,,,0,,0,0,4,0,,{AAAAAAA-4F7F-4896-8C5A-5CEDFB6A9DC0},,,,BUSINES1,00:30:12:9C:58:3B,11.0.4000.290,,,,,,,,,,,,,,,,0,,7v220ss1-3339-4e86-b06e-291ca5a2a87d,0,, +2A0A1E0A300E,51,1,2,SQLZZSERVEDD,davnads,W32.Changeup!gen33,D:\Twinkle_Prod$\VM11 XXX\outside\test.exe.txt,5,3,19,256,37748804,"",1354297652,,0,101 {A149F6F3-67B6-4D50-8A50-82E44938E96E} 0 2 W32.Changeup!gen23 2;0;13 0 0 74ac79f8-d0c0-4065-acd7-27af2fc7ec4c 0,405536768,54202,0,0,0,,,0,,0,0,1,0,,{AAAAAAA-4F7F-4896-8C5A-5CEDFB6A9DC0},,,,BUSINES1,00:30:12:9C:58:3B,11.0.4000.290,,,,,,,,,,,,,,,,999,,7v220ss1-3339-4e86-b06e-291ca5a2a87d,405536768,, +2A0A1E0C0427,7,3,8,SQLZZSERVEDD,SYSTEM,,,,,,,16777216,"New virus definition file loaded. Version: 141130b.",0,,0,,,,,0,,,,,,,,,,,{AAAAAAA-4F7F-4896-8C5A-5CEDFB6A9DC0},,,,BUSINES1,00:30:12:9C:58:3B,11.0.4000.290,,,,,,,,,,,,,,,,0,,,,, +2A0A1E100602,7,3,8,SQLZZSERVEDD,SYSTEM,,,,,,,16777216,"New virus definition file loaded. Version: 122230r.",0,,0,,,,,0,,,,,,,,,,,{AAAAAAA-4F7F-4896-8C5A-5CEDFB6A9DC0},,,,BUSINES1,00:30:12:9C:58:3B,11.0.4000.290,,,,,,,,,,,,,,,,0,,,,, diff --git a/test_data/SysEvent.Evt b/test_data/SysEvent.Evt new file mode 100644 index 0000000..65bc5a5 Binary files /dev/null and b/test_data/SysEvent.Evt differ diff --git a/test_data/System.evtx b/test_data/System.evtx new file mode 100644 index 0000000..70f50e5 Binary files /dev/null and b/test_data/System.evtx differ diff --git a/test_data/TASKHOST.EXE-3AE259FC.pf b/test_data/TASKHOST.EXE-3AE259FC.pf new file mode 100755 index 0000000..0deb83b Binary files /dev/null and b/test_data/TASKHOST.EXE-3AE259FC.pf differ diff --git a/test_data/VolumeConfiguration.plist b/test_data/VolumeConfiguration.plist new file mode 100644 index 0000000..2482ef3 --- /dev/null +++ b/test_data/VolumeConfiguration.plist @@ -0,0 +1,58 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>ConfigurationCreationDate</key> + <date>2013-05-27T12:23:50Z</date> + <key>ConfigurationCreationVersion</key> + <string>Version 10.8 (Build 12A269)</string> + <key>ConfigurationModificationDate</key> + <date>2013-06-25T05:54:43Z</date> + <key>ConfigurationModificationVersion</key> + <string>Version 10.8.4 (Build 12E3067)</string> + <key>Exclusions</key> + <array/> + <key>Options</key> + <dict> + <key>ConfigurationType</key> + <string>Default</string> + </dict> + <key>Stores</key> + <dict> + <key>4D4BFEB5-7FE6-4033-AAAA-AAAABBBBCCCCDDDD</key> + <dict> + <key>CreationDate</key> + <date>2013-06-25T05:54:43Z</date> + <key>CreationVersion</key> + <string>Version 10.8.4 (Build 12E3067)</string> + <key>IndexVersion</key> + <integer>95</integer> + <key>PartialPath</key> + <string>/.MobileBackups</string> + <key>PolicyDate</key> + <date>2013-06-25T05:54:43Z</date> + <key>PolicyLevel</key> + <string>kMDConfigSearchLevelReadWrite</string> + <key>PolicyVersion</key> + <string>Version 10.8.4 (Build 12E3067)</string> + </dict> + <key>AA82EDD7-613C-45E6-82EF-AAAABBBBCCCCDDDD</key> + <dict> + <key>CreationDate</key> + <date>2013-05-27T12:27:36Z</date> + <key>CreationVersion</key> + <string>Version 10.8 (Build 12A269)</string> + <key>IndexVersion</key> + <integer>95</integer> + <key>PartialPath</key> + <string>/</string> + <key>PolicyDate</key> + <date>2013-05-27T12:27:37Z</date> + <key>PolicyLevel</key> + <string>kMDConfigSearchLevelReadWrite</string> + <key>PolicyVersion</key> + <string>Version 10.8 (Build 12A269)</string> + </dict> + </dict> +</dict> +</plist> diff --git a/test_data/WUAUCLT.EXE-830BCC14.pf b/test_data/WUAUCLT.EXE-830BCC14.pf new file mode 100644 index 0000000..72884e9 Binary files /dev/null and b/test_data/WUAUCLT.EXE-830BCC14.pf differ diff --git a/test_data/WebCacheV01.dat b/test_data/WebCacheV01.dat new file mode 100755 index 0000000..428c3e7 Binary files /dev/null and b/test_data/WebCacheV01.dat differ diff --git a/test_data/Windows.edb b/test_data/Windows.edb new file mode 100755 index 0000000..8a29ed3 Binary files /dev/null and b/test_data/Windows.edb differ diff --git a/test_data/__init__.py b/test_data/__init__.py new file mode 100644 index 0000000..1f5c4b3 --- /dev/null +++ b/test_data/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/test_data/activity.sqlite b/test_data/activity.sqlite new file mode 100644 index 0000000..32a3446 Binary files /dev/null and b/test_data/activity.sqlite differ diff --git a/test_data/appfirewall.log b/test_data/appfirewall.log new file mode 100644 index 0000000..828b715 --- /dev/null +++ b/test_data/appfirewall.log @@ -0,0 +1,47 @@ +Nov 2 04:07:35 DarkTemplar-2.local socketfilterfw[112] <Error>: Logging: creating /var/log/appfirewall.log +Nov 2 04:07:35 DarkTemplar-2.local socketfilterfw[112] <Info>: Dropbox: Allow TCP LISTEN (in:0 out:1) +Nov 2 04:07:35 DarkTemplar-2.local socketfilterfw[112] <Info>: Notify: Allow TCP LISTEN (in:0 out:1) +Nov 2 13:02:08 DarkTemplar-2.local socketfilterfw[112] <Info>: Spotify: Allow TCP LISTEN (in:0 out:2) +Nov 2 17:05:39 DarkTemplar-2.local socketfilterfw[112] <Info>: Skype: Allow TCP LISTEN (in:0 out:1) +Nov 2 19:39:01 DarkTemplar-2.local socketfilterfw[112] <Info>: Skype: Allow TCP LISTEN (in:0 out:1) +Nov 3 03:07:29 DarkTemplar-2.local socketfilterfw[112] <Info>: Skype: Allow TCP LISTEN (in:0 out:1) +Nov 3 03:45:21 DarkTemplar-2.local socketfilterfw[87] <Info>: Dropbox: Allow TCP LISTEN (in:0 out:1) +Nov 3 03:45:21 DarkTemplar-2.local socketfilterfw[87] <Info>: Notify: Allow TCP LISTEN (in:0 out:1) +Nov 3 13:25:15 DarkTemplar-2.local socketfilterfw[87] <Info>: Dropbox: Allow TCP LISTEN (in:0 out:1) +Nov 3 13:25:15 DarkTemplar-2.local socketfilterfw[87] <Info>: Notify: Allow TCP LISTEN (in:0 out:1) +Nov 3 16:52:25 DarkTemplar-2.local socketfilterfw[87] <Info>: Dropbox: Allow TCP LISTEN (in:0 out:1) +Nov 3 16:52:25 DarkTemplar-2.local socketfilterfw[87] <Info>: Notify: Allow TCP LISTEN (in:0 out:1) +Nov 3 18:16:56 DarkTemplar-2.local socketfilterfw[87] <Info>: popup: Allow TCP LISTEN (in:0 out:1) +Nov 3 18:42:41 DarkTemplar-2.local socketfilterfw[87] <Info>: popup: Allow TCP LISTEN (in:0 out:1) +Nov 4 02:28:47 DarkTemplar-2.local socketfilterfw[87] <Info>: popup: Allow TCP LISTEN (in:0 out:1) +Nov 10 02:47:40 DarkTemplar-2.local socketfilterfw[87] <Info>: Spotify: Allow TCP LISTEN (in:0 out:2) +Nov 11 22:24:00 DarkTemplar-2.local socketfilterfw[87] <Info>: Skype: Allow TCP LISTEN (in:0 out:1) +Nov 11 22:44:00 DarkTemplar-2.local socketfilterfw[87] <Info>: Skype: Allow TCP CONNECT (in:1 out:0) +Nov 11 22:54:00 DarkTemplar-2.local socketfilterfw[87] <Info>: Skype: Allow TCP CONNECT (in:1 out:0) +Nov 11 23:16:01 DarkTemplar-2.local socketfilterfw[87] <Info>: Skype: Allow TCP CONNECT (in:1 out:0) +Nov 11 23:18:31 DarkTemplar-2.local socketfilterfw[87] <Info>: Skype: Allow TCP CONNECT (in:1 out:0) +Nov 12 18:45:06 DarkTemplar-2.local socketfilterfw[87] <Info>: popup: Allow TCP LISTEN (in:0 out:1) +Nov 12 18:52:17 DarkTemplar-2.local socketfilterfw[87] <Info>: popup: Allow TCP LISTEN (in:0 out:1) +Nov 12 18:56:25 DarkTemplar-2.local socketfilterfw[87] <Info>: popup: Allow TCP LISTEN (in:0 out:1) +Nov 12 22:11:32 DarkTemplar-2.local socketfilterfw[93] <Info>: Dropbox: Allow TCP LISTEN (in:0 out:1) +Nov 12 22:11:32 DarkTemplar-2.local socketfilterfw[93] <Info>: Notify: Allow TCP LISTEN (in:0 out:1) +Nov 13 01:55:05 DarkTemplar-2.local socketfilterfw[87] <Info>: Dropbox: Allow TCP LISTEN (in:0 out:1) +Nov 13 01:55:05 DarkTemplar-2.local socketfilterfw[87] <Info>: Notify: Allow TCP LISTEN (in:0 out:1) +Nov 13 17:53:09 DarkTemplar-2.local socketfilterfw[87] <Info>: Dropbox: Allow TCP LISTEN (in:0 out:1) +Nov 13 17:53:09 DarkTemplar-2.local socketfilterfw[87] <Info>: Notify: Allow TCP LISTEN (in:0 out:1) +Nov 13 18:48:10 DarkTemplar-2.local socketfilterfw[87] <Info>: Spotify: Allow TCP LISTEN (in:0 out:2) +Nov 19 20:52:18 DarkTemplar-2.local socketfilterfw[87] <Info>: Spotify: Allow TCP LISTEN (in:0 out:2) +Nov 19 21:01:48 DarkTemplar-2.local socketfilterfw[87] <Info>: Spotify: Allow TCP LISTEN (in:0 out:2) +Nov 19 21:11:48 DarkTemplar-2.local socketfilterfw[87] <Info>: Skype: Allow TCP LISTEN (in:0 out:1) +Nov 19 21:31:18 DarkTemplar-2.local socketfilterfw[87] <Info>: Skype: Allow TCP CONNECT (in:1 out:0) +Nov 28 14:35:31 DarkTemplar-2.local socketfilterfw[87] <Info>: Skype: Allow TCP LISTEN (in:0 out:1) +Nov 28 20:35:01 DarkTemplar-2.local socketfilterfw[87] <Info>: iTunes: Allow TCP LISTEN (in:0 out:1) +Nov 29 22:17:30 DarkTemplar-2.local socketfilterfw[87] <Info>: Spotify: Allow TCP LISTEN (in:0 out:1) +Nov 29 22:18:29 --- last message repeated 1 time --- +Nov 30 20:37:03 DarkTemplar-2.local socketfilterfw[87] <Info>: Dropbox: Allow TCP LISTEN (in:0 out:1) +Nov 30 20:37:03 DarkTemplar-2.local socketfilterfw[87] <Info>: Notify: Allow TCP LISTEN (in:0 out:1) +Nov 30 23:37:34 DarkTemplar-2.local socketfilterfw[87] <Info>: iTunes: Allow TCP LISTEN (in:0 out:1) +Dec 1 22:13:23 DarkTemplar-2.local socketfilterfw[87] <Info>: Dropbox: Allow TCP LISTEN (in:0 out:1) +Dec 1 22:13:23 DarkTemplar-2.local socketfilterfw[87] <Info>: Notify: Allow TCP LISTEN (in:0 out:1) +Dec 31 23:59:23 DarkTemplar-2.local socketfilterfw[87] <Info>: Notify: Allow TCP LISTEN (in:0 out:1) +Jan 1 01:13:23 DarkTemplar-2.local socketfilterfw[87] <Info>: Notify: Allow TCP LISTEN (in:0 out:1) diff --git a/test_data/apple.bsm b/test_data/apple.bsm new file mode 100644 index 0000000..0502c8e Binary files /dev/null and b/test_data/apple.bsm differ diff --git a/test_data/applesystemlog.asl b/test_data/applesystemlog.asl new file mode 100644 index 0000000..0dbaa7a Binary files /dev/null and b/test_data/applesystemlog.asl differ diff --git a/test_data/application_usage.sqlite b/test_data/application_usage.sqlite new file mode 100644 index 0000000..9844069 Binary files /dev/null and b/test_data/application_usage.sqlite differ diff --git a/test_data/bencode_transmission b/test_data/bencode_transmission new file mode 100644 index 0000000..8c1c42c Binary files /dev/null and b/test_data/bencode_transmission differ diff --git a/test_data/bencode_utorrent b/test_data/bencode_utorrent new file mode 100755 index 0000000..8e1c2f5 Binary files /dev/null and b/test_data/bencode_utorrent differ diff --git a/test_data/chrome_cache/data_0 b/test_data/chrome_cache/data_0 new file mode 100644 index 0000000..874c934 Binary files /dev/null and b/test_data/chrome_cache/data_0 differ diff --git a/test_data/chrome_cache/data_1 b/test_data/chrome_cache/data_1 new file mode 100644 index 0000000..f44d18f Binary files /dev/null and b/test_data/chrome_cache/data_1 differ diff --git a/test_data/chrome_cache/data_2 b/test_data/chrome_cache/data_2 new file mode 100644 index 0000000..36c96dd Binary files /dev/null and b/test_data/chrome_cache/data_2 differ diff --git a/test_data/chrome_cache/data_3 b/test_data/chrome_cache/data_3 new file mode 100644 index 0000000..fb240ba Binary files /dev/null and b/test_data/chrome_cache/data_3 differ diff --git a/test_data/chrome_cache/f_000001 b/test_data/chrome_cache/f_000001 new file mode 100644 index 0000000..91b4253 Binary files /dev/null and b/test_data/chrome_cache/f_000001 differ diff --git a/test_data/chrome_cache/f_000002 b/test_data/chrome_cache/f_000002 new file mode 100644 index 0000000..ab2b6ce Binary files /dev/null and b/test_data/chrome_cache/f_000002 differ diff --git a/test_data/chrome_cache/f_000003 b/test_data/chrome_cache/f_000003 new file mode 100644 index 0000000..5a8c318 Binary files /dev/null and b/test_data/chrome_cache/f_000003 differ diff --git a/test_data/chrome_cache/f_000004 b/test_data/chrome_cache/f_000004 new file mode 100644 index 0000000..c06198d Binary files /dev/null and b/test_data/chrome_cache/f_000004 differ diff --git a/test_data/chrome_cache/f_000005 b/test_data/chrome_cache/f_000005 new file mode 100644 index 0000000..2abc3ed Binary files /dev/null and b/test_data/chrome_cache/f_000005 differ diff --git a/test_data/chrome_cache/f_000006 b/test_data/chrome_cache/f_000006 new file mode 100644 index 0000000..c1ab595 Binary files /dev/null and b/test_data/chrome_cache/f_000006 differ diff --git a/test_data/chrome_cache/f_000007 b/test_data/chrome_cache/f_000007 new file mode 100644 index 0000000..ee2ea79 Binary files /dev/null and b/test_data/chrome_cache/f_000007 differ diff --git a/test_data/chrome_cache/f_000008 b/test_data/chrome_cache/f_000008 new file mode 100644 index 0000000..66fed02 Binary files /dev/null and b/test_data/chrome_cache/f_000008 differ diff --git a/test_data/chrome_cache/f_000009 b/test_data/chrome_cache/f_000009 new file mode 100644 index 0000000..eb559ea Binary files /dev/null and b/test_data/chrome_cache/f_000009 differ diff --git a/test_data/chrome_cache/f_00000a b/test_data/chrome_cache/f_00000a new file mode 100644 index 0000000..ab71012 Binary files /dev/null and b/test_data/chrome_cache/f_00000a differ diff --git a/test_data/chrome_cache/f_00000b b/test_data/chrome_cache/f_00000b new file mode 100644 index 0000000..5c7e217 Binary files /dev/null and b/test_data/chrome_cache/f_00000b differ diff --git a/test_data/chrome_cache/f_00000c b/test_data/chrome_cache/f_00000c new file mode 100644 index 0000000..0dfecd5 Binary files /dev/null and b/test_data/chrome_cache/f_00000c differ diff --git a/test_data/chrome_cache/f_00000d b/test_data/chrome_cache/f_00000d new file mode 100644 index 0000000..eb214fe Binary files /dev/null and b/test_data/chrome_cache/f_00000d differ diff --git a/test_data/chrome_cache/f_00000e b/test_data/chrome_cache/f_00000e new file mode 100644 index 0000000..766ec71 Binary files /dev/null and b/test_data/chrome_cache/f_00000e differ diff --git a/test_data/chrome_cache/f_00000f b/test_data/chrome_cache/f_00000f new file mode 100644 index 0000000..f385caa Binary files /dev/null and b/test_data/chrome_cache/f_00000f differ diff --git a/test_data/chrome_cache/f_000010 b/test_data/chrome_cache/f_000010 new file mode 100644 index 0000000..deb7ec4 Binary files /dev/null and b/test_data/chrome_cache/f_000010 differ diff --git a/test_data/chrome_cache/f_000011 b/test_data/chrome_cache/f_000011 new file mode 100644 index 0000000..7df9805 Binary files /dev/null and b/test_data/chrome_cache/f_000011 differ diff --git a/test_data/chrome_cache/f_000012 b/test_data/chrome_cache/f_000012 new file mode 100644 index 0000000..85c1d10 Binary files /dev/null and b/test_data/chrome_cache/f_000012 differ diff --git a/test_data/chrome_cache/f_000014 b/test_data/chrome_cache/f_000014 new file mode 100644 index 0000000..354701a Binary files /dev/null and b/test_data/chrome_cache/f_000014 differ diff --git a/test_data/chrome_cache/f_000015 b/test_data/chrome_cache/f_000015 new file mode 100644 index 0000000..0b0f5e5 Binary files /dev/null and b/test_data/chrome_cache/f_000015 differ diff --git a/test_data/chrome_cache/f_000016 b/test_data/chrome_cache/f_000016 new file mode 100644 index 0000000..c92a643 Binary files /dev/null and b/test_data/chrome_cache/f_000016 differ diff --git a/test_data/chrome_cache/f_000017 b/test_data/chrome_cache/f_000017 new file mode 100644 index 0000000..1a394c7 Binary files /dev/null and b/test_data/chrome_cache/f_000017 differ diff --git a/test_data/chrome_cache/f_000018 b/test_data/chrome_cache/f_000018 new file mode 100644 index 0000000..bf155cf Binary files /dev/null and b/test_data/chrome_cache/f_000018 differ diff --git a/test_data/chrome_cache/f_000019 b/test_data/chrome_cache/f_000019 new file mode 100644 index 0000000..5d66374 Binary files /dev/null and b/test_data/chrome_cache/f_000019 differ diff --git a/test_data/chrome_cache/f_00001a b/test_data/chrome_cache/f_00001a new file mode 100644 index 0000000..462af1a Binary files /dev/null and b/test_data/chrome_cache/f_00001a differ diff --git a/test_data/chrome_cache/f_00001b b/test_data/chrome_cache/f_00001b new file mode 100644 index 0000000..5cf171f Binary files /dev/null and b/test_data/chrome_cache/f_00001b differ diff --git a/test_data/chrome_cache/f_00001c b/test_data/chrome_cache/f_00001c new file mode 100644 index 0000000..9beb048 Binary files /dev/null and b/test_data/chrome_cache/f_00001c differ diff --git a/test_data/chrome_cache/f_00001d b/test_data/chrome_cache/f_00001d new file mode 100644 index 0000000..d2df9cc Binary files /dev/null and b/test_data/chrome_cache/f_00001d differ diff --git a/test_data/chrome_cache/f_00001e b/test_data/chrome_cache/f_00001e new file mode 100644 index 0000000..7720a97 Binary files /dev/null and b/test_data/chrome_cache/f_00001e differ diff --git a/test_data/chrome_cache/f_00001f b/test_data/chrome_cache/f_00001f new file mode 100644 index 0000000..5e5618d Binary files /dev/null and b/test_data/chrome_cache/f_00001f differ diff --git a/test_data/chrome_cache/f_000020 b/test_data/chrome_cache/f_000020 new file mode 100644 index 0000000..e95f464 Binary files /dev/null and b/test_data/chrome_cache/f_000020 differ diff --git a/test_data/chrome_cache/f_000021 b/test_data/chrome_cache/f_000021 new file mode 100644 index 0000000..e4c4c4d Binary files /dev/null and b/test_data/chrome_cache/f_000021 differ diff --git a/test_data/chrome_cache/f_000022 b/test_data/chrome_cache/f_000022 new file mode 100644 index 0000000..ef2843c Binary files /dev/null and b/test_data/chrome_cache/f_000022 differ diff --git a/test_data/chrome_cache/f_000023 b/test_data/chrome_cache/f_000023 new file mode 100644 index 0000000..e2fd69f Binary files /dev/null and b/test_data/chrome_cache/f_000023 differ diff --git a/test_data/chrome_cache/f_000024 b/test_data/chrome_cache/f_000024 new file mode 100644 index 0000000..d2cf34e Binary files /dev/null and b/test_data/chrome_cache/f_000024 differ diff --git a/test_data/chrome_cache/f_000025 b/test_data/chrome_cache/f_000025 new file mode 100644 index 0000000..0de03fd Binary files /dev/null and b/test_data/chrome_cache/f_000025 differ diff --git a/test_data/chrome_cache/f_000026 b/test_data/chrome_cache/f_000026 new file mode 100644 index 0000000..b44db19 Binary files /dev/null and b/test_data/chrome_cache/f_000026 differ diff --git a/test_data/chrome_cache/f_000027 b/test_data/chrome_cache/f_000027 new file mode 100644 index 0000000..e671875 Binary files /dev/null and b/test_data/chrome_cache/f_000027 differ diff --git a/test_data/chrome_cache/f_000028 b/test_data/chrome_cache/f_000028 new file mode 100644 index 0000000..46a7476 --- /dev/null +++ b/test_data/chrome_cache/f_000028 @@ -0,0 +1,32 @@ +/* + * jQuery 1.2.6 - New Wave Javascript + * + * Copyright (c) 2008 John Resig (jquery.com) + * Dual licensed under the MIT (MIT-LICENSE.txt) + * and GPL (GPL-LICENSE.txt) licenses. + * + * $Date: 2008-11-28 18:23:57 -0600 (Fri, 28 Nov 2008) $ + * $Rev: 721596 $ + */ +(function(){var _jQuery=window.jQuery,_$=window.$;var jQuery=window.jQuery=window.$=function(selector,context){return new jQuery.fn.init(selector,context);};var quickExpr=/^[^<]*(<(.|\s)+>)[^>]*$|^#(\w+)$/,isSimple=/^.[^:#\[\.]*$/,undefined;jQuery.fn=jQuery.prototype={init:function(selector,context){selector=selector||document;if(selector.nodeType){this[0]=selector;this.length=1;return this;}if(typeof selector=="string"){var match=quickExpr.exec(selector);if(match&&(match[1]||!context)){if(match[1])selector=jQuery.clean([match[1]],context);else{var elem=document.getElementById(match[3]);if(elem){if(elem.id!=match[3])return jQuery().find(selector);return jQuery(elem);}selector=[];}}else +return jQuery(context).find(selector);}else if(jQuery.isFunction(selector))return jQuery(document)[jQuery.fn.ready?"ready":"load"](selector);return this.setArray(jQuery.makeArray(selector));},jquery:"1.2.6",size:function(){return this.length;},length:0,get:function(num){return num==undefined?jQuery.makeArray(this):this[num];},pushStack:function(elems){var ret=jQuery(elems);ret.prevObject=this;return ret;},setArray:function(elems){this.length=0;Array.prototype.push.apply(this,elems);return this;},each:function(callback,args){return jQuery.each(this,callback,args);},index:function(elem){var ret=-1;return jQuery.inArray(elem&&elem.jquery?elem[0]:elem,this);},attr:function(name,value,type){var options=name;if(name.constructor==String)if(value===undefined)return this[0]&&jQuery[type||"attr"](this[0],name);else{options={};options[name]=value;}return this.each(function(i){for(name in options)jQuery.attr(type?this.style:this,name,jQuery.prop(this,options[name],type,i,name));});},css:function(key,value){if((key=='width'||key=='height')&&parseFloat(value)<0)value=undefined;return this.attr(key,value,"curCSS");},text:function(text){if(typeof text!="object"&&text!=null)return this.empty().append((this[0]&&this[0].ownerDocument||document).createTextNode(text));var ret="";jQuery.each(text||this,function(){jQuery.each(this.childNodes,function(){if(this.nodeType!=8)ret+=this.nodeType!=1?this.nodeValue:jQuery.fn.text([this]);});});return ret;},wrapAll:function(html){if(this[0])jQuery(html,this[0].ownerDocument).clone().insertBefore(this[0]).map(function(){var elem=this;while(elem.firstChild)elem=elem.firstChild;return elem;}).append(this);return this;},wrapInner:function(html){return this.each(function(){jQuery(this).contents().wrapAll(html);});},wrap:function(html){return this.each(function(){jQuery(this).wrapAll(html);});},append:function(){return this.domManip(arguments,true,false,function(elem){if(this.nodeType==1)this.appendChild(elem);});},prepend:function(){return this.domManip(arguments,true,true,function(elem){if(this.nodeType==1)this.insertBefore(elem,this.firstChild);});},before:function(){return this.domManip(arguments,false,false,function(elem){this.parentNode.insertBefore(elem,this);});},after:function(){return this.domManip(arguments,false,true,function(elem){this.parentNode.insertBefore(elem,this.nextSibling);});},end:function(){return this.prevObject||jQuery([]);},find:function(selector){var elems=jQuery.map(this,function(elem){return jQuery.find(selector,elem);});return this.pushStack(/[^+>] [^+>]/.test(selector)||selector.indexOf("..")>-1?jQuery.unique(elems):elems);},clone:function(events){var ret=this.map(function(){if(jQuery.browser.msie&&!jQuery.isXMLDoc(this)){var clone=this.cloneNode(true),container=document.createElement("div");container.appendChild(clone);return jQuery.clean([container.innerHTML])[0];}else +return this.cloneNode(true);});var clone=ret.find("*").andSelf().each(function(){if(this[expando]!=undefined)this[expando]=null;});if(events===true)this.find("*").andSelf().each(function(i){if(this.nodeType==3)return;var events=jQuery.data(this,"events");for(var type in events)for(var handler in events[type])jQuery.event.add(clone[i],type,events[type][handler],events[type][handler].data);});return ret;},filter:function(selector){return this.pushStack(jQuery.isFunction(selector)&&jQuery.grep(this,function(elem,i){return selector.call(elem,i);})||jQuery.multiFilter(selector,this));},not:function(selector){if(selector.constructor==String)if(isSimple.test(selector))return this.pushStack(jQuery.multiFilter(selector,this,true));else +selector=jQuery.multiFilter(selector,this);var isArrayLike=selector.length&&selector[selector.length-1]!==undefined&&!selector.nodeType;return this.filter(function(){return isArrayLike?jQuery.inArray(this,selector)<0:this!=selector;});},add:function(selector){return this.pushStack(jQuery.unique(jQuery.merge(this.get(),typeof selector=='string'?jQuery(selector):jQuery.makeArray(selector))));},is:function(selector){return!!selector&&jQuery.multiFilter(selector,this).length>0;},hasClass:function(selector){return this.is("."+selector);},val:function(value){if(value==undefined){if(this.length){var elem=this[0];if(jQuery.nodeName(elem,"select")){var index=elem.selectedIndex,values=[],options=elem.options,one=elem.type=="select-one";if(index<0)return null;for(var i=one?index:0,max=one?index+1:options.length;i<max;i++){var option=options[i];if(option.selected){value=jQuery.browser.msie&&!option.attributes.value.specified?option.text:option.value;if(one)return value;values.push(value);}}return values;}else +return(this[0].value||"").replace(/\r/g,"");}return undefined;}if(value.constructor==Number)value+='';return this.each(function(){if(this.nodeType!=1)return;if(value.constructor==Array&&/radio|checkbox/.test(this.type))this.checked=(jQuery.inArray(this.value,value)>=0||jQuery.inArray(this.name,value)>=0);else if(jQuery.nodeName(this,"select")){var values=jQuery.makeArray(value);jQuery("option",this).each(function(){this.selected=(jQuery.inArray(this.value,values)>=0||jQuery.inArray(this.text,values)>=0);});if(!values.length)this.selectedIndex=-1;}else +this.value=value;});},html:function(value){return value==undefined?(this[0]?this[0].innerHTML:null):this.empty().append(value);},replaceWith:function(value){return this.after(value).remove();},eq:function(i){return this.slice(i,i+1);},slice:function(){return this.pushStack(Array.prototype.slice.apply(this,arguments));},map:function(callback){return this.pushStack(jQuery.map(this,function(elem,i){return callback.call(elem,i,elem);}));},andSelf:function(){return this.add(this.prevObject);},data:function(key,value){var parts=key.split(".");parts[1]=parts[1]?"."+parts[1]:"";if(value===undefined){var data=this.triggerHandler("getData"+parts[1]+"!",[parts[0]]);if(data===undefined&&this.length)data=jQuery.data(this[0],key);return data===undefined&&parts[1]?this.data(parts[0]):data;}else +return this.trigger("setData"+parts[1]+"!",[parts[0],value]).each(function(){jQuery.data(this,key,value);});},removeData:function(key){return this.each(function(){jQuery.removeData(this,key);});},domManip:function(args,table,reverse,callback){var clone=this.length>1,elems;return this.each(function(){if(!elems){elems=jQuery.clean(args,this.ownerDocument);if(reverse)elems.reverse();}var obj=this;if(table&&jQuery.nodeName(this,"table")&&jQuery.nodeName(elems[0],"tr"))obj=this.getElementsByTagName("tbody")[0]||this.appendChild(this.ownerDocument.createElement("tbody"));var scripts=jQuery([]);jQuery.each(elems,function(){var elem=clone?jQuery(this).clone(true)[0]:this;if(jQuery.nodeName(elem,"script"))scripts=scripts.add(elem);else{if(elem.nodeType==1)scripts=scripts.add(jQuery("script",elem).remove());callback.call(obj,elem);}});scripts.each(evalScript);});}};jQuery.fn.init.prototype=jQuery.fn;function evalScript(i,elem){if(elem.src)jQuery.ajax({url:elem.src,async:false,dataType:"script"});else +jQuery.globalEval(elem.text||elem.textContent||elem.innerHTML||"");if(elem.parentNode)elem.parentNode.removeChild(elem);}function now(){return+new Date;}jQuery.extend=jQuery.fn.extend=function(){var target=arguments[0]||{},i=1,length=arguments.length,deep=false,options;if(target.constructor==Boolean){deep=target;target=arguments[1]||{};i=2;}if(typeof target!="object"&&typeof target!="function")target={};if(length==i){target=this;--i;}for(;i<length;i++)if((options=arguments[i])!=null)for(var name in options){var src=target[name],copy=options[name];if(target===copy)continue;if(deep&©&&typeof copy=="object"&&!copy.nodeType)target[name]=jQuery.extend(deep,src||(copy.length!=null?[]:{}),copy);else if(copy!==undefined)target[name]=copy;}return target;};var expando="jQuery"+now(),uuid=0,windowData={},exclude=/z-?index|font-?weight|opacity|zoom|line-?height/i,defaultView=document.defaultView||{};jQuery.extend({noConflict:function(deep){window.$=_$;if(deep)window.jQuery=_jQuery;return jQuery;},isFunction:function(fn){return!!fn&&typeof fn!="string"&&!fn.nodeName&&fn.constructor!=Array&&/^[\s[]?function/.test(fn+"");},isXMLDoc:function(elem){return elem.documentElement&&!elem.body||elem.tagName&&elem.ownerDocument&&!elem.ownerDocument.body;},globalEval:function(data){data=jQuery.trim(data);if(data){var head=document.getElementsByTagName("head")[0]||document.documentElement,script=document.createElement("script");script.type="text/javascript";if(jQuery.browser.msie)script.text=data;else +script.appendChild(document.createTextNode(data));head.insertBefore(script,head.firstChild);head.removeChild(script);}},nodeName:function(elem,name){return elem.nodeName&&elem.nodeName.toUpperCase()==name.toUpperCase();},cache:{},data:function(elem,name,data){elem=elem==window?windowData:elem;var id=elem[expando];if(!id)id=elem[expando]=++uuid;if(name&&!jQuery.cache[id])jQuery.cache[id]={};if(data!==undefined)jQuery.cache[id][name]=data;return name?jQuery.cache[id][name]:id;},removeData:function(elem,name){elem=elem==window?windowData:elem;var id=elem[expando];if(name){if(jQuery.cache[id]){delete jQuery.cache[id][name];name="";for(name in jQuery.cache[id])break;if(!name)jQuery.removeData(elem);}}else{try{delete elem[expando];}catch(e){if(elem.removeAttribute)elem.removeAttribute(expando);}delete jQuery.cache[id];}},each:function(object,callback,args){var name,i=0,length=object.length;if(args){if(length==undefined){for(name in object)if(callback.apply(object[name],args)===false)break;}else +for(;i<length;)if(callback.apply(object[i++],args)===false)break;}else{if(length==undefined){for(name in object)if(callback.call(object[name],name,object[name])===false)break;}else +for(var value=object[0];i<length&&callback.call(value,i,value)!==false;value=object[++i]){}}return object;},prop:function(elem,value,type,i,name){if(jQuery.isFunction(value))value=value.call(elem,i);return value&&value.constructor==Number&&type=="curCSS"&&!exclude.test(name)?value+"px":value;},className:{add:function(elem,classNames){jQuery.each((classNames||"").split(/\s+/),function(i,className){if(elem.nodeType==1&&!jQuery.className.has(elem.className,className))elem.className+=(elem.className?" ":"")+className;});},remove:function(elem,classNames){if(elem.nodeType==1)elem.className=classNames!=undefined?jQuery.grep(elem.className.split(/\s+/),function(className){return!jQuery.className.has(classNames,className);}).join(" "):"";},has:function(elem,className){return jQuery.inArray(className,(elem.className||elem).toString().split(/\s+/))>-1;}},swap:function(elem,options,callback){var old={};for(var name in options){old[name]=elem.style[name];elem.style[name]=options[name];}callback.call(elem);for(var name in options)elem.style[name]=old[name];},css:function(elem,name,force){if(name=="width"||name=="height"){var val,props={position:"absolute",visibility:"hidden",display:"block"},which=name=="width"?["Left","Right"]:["Top","Bottom"];function getWH(){val=name=="width"?elem.offsetWidth:elem.offsetHeight;var padding=0,border=0;jQuery.each(which,function(){padding+=parseFloat(jQuery.curCSS(elem,"padding"+this,true))||0;border+=parseFloat(jQuery.curCSS(elem,"border"+this+"Width",true))||0;});val-=Math.round(padding+border);}if(jQuery(elem).is(":visible"))getWH();else +jQuery.swap(elem,props,getWH);return Math.max(0,val);}return jQuery.curCSS(elem,name,force);},curCSS:function(elem,name,force){var ret,style=elem.style;function color(elem){if(!jQuery.browser.safari)return false;var ret=defaultView.getComputedStyle(elem,null);return!ret||ret.getPropertyValue("color")=="";}if(name=="opacity"&&jQuery.browser.msie){ret=jQuery.attr(style,"opacity");return ret==""?"1":ret;}if(jQuery.browser.opera&&name=="display"){var save=style.outline;style.outline="0 solid black";style.outline=save;}if(name.match(/float/i))name=styleFloat;if(!force&&style&&style[name])ret=style[name];else if(defaultView.getComputedStyle){if(name.match(/float/i))name="float";name=name.replace(/([A-Z])/g,"-$1").toLowerCase();var computedStyle=defaultView.getComputedStyle(elem,null);if(computedStyle&&!color(elem))ret=computedStyle.getPropertyValue(name);else{var swap=[],stack=[],a=elem,i=0;for(;a&&color(a);a=a.parentNode)stack.unshift(a);for(;i<stack.length;i++)if(color(stack[i])){swap[i]=stack[i].style.display;stack[i].style.display="block";}ret=name=="display"&&swap[stack.length-1]!=null?"none":(computedStyle&&computedStyle.getPropertyValue(name))||"";for(i=0;i<swap.length;i++)if(swap[i]!=null)stack[i].style.display=swap[i];}if(name=="opacity"&&ret=="")ret="1";}else if(elem.currentStyle){var camelCase=name.replace(/\-(\w)/g,function(all,letter){return letter.toUpperCase();});ret=elem.currentStyle[name]||elem.currentStyle[camelCase];if(!/^\d+(px)?$/i.test(ret)&&/^\d/.test(ret)){var left=style.left,rsLeft=elem.runtimeStyle.left;elem.runtimeStyle.left=elem.currentStyle.left;style.left=ret||0;ret=style.pixelLeft+"px";style.left=left;elem.runtimeStyle.left=rsLeft;}}return ret;},clean:function(elems,context){var ret=[];context=context||document;if(typeof context.createElement=='undefined')context=context.ownerDocument||context[0]&&context[0].ownerDocument||document;jQuery.each(elems,function(i,elem){if(!elem)return;if(elem.constructor==Number)elem+='';if(typeof elem=="string"){elem=elem.replace(/(<(\w+)[^>]*?)\/>/g,function(all,front,tag){return tag.match(/^(abbr|br|col|img|input|link|meta|param|hr|area|embed)$/i)?all:front+"></"+tag+">";});var tags=jQuery.trim(elem).toLowerCase(),div=context.createElement("div");var wrap=!tags.indexOf("<opt")&&[1,"<select multiple='multiple'>","</select>"]||!tags.indexOf("<leg")&&[1,"<fieldset>","</fieldset>"]||tags.match(/^<(thead|tbody|tfoot|colg|cap)/)&&[1,"<table>","</table>"]||!tags.indexOf("<tr")&&[2,"<table><tbody>","</tbody></table>"]||(!tags.indexOf("<td")||!tags.indexOf("<th"))&&[3,"<table><tbody><tr>","</tr></tbody></table>"]||!tags.indexOf("<col")&&[2,"<table><tbody></tbody><colgroup>","</colgroup></table>"]||jQuery.browser.msie&&[1,"div<div>","</div>"]||[0,"",""];div.innerHTML=wrap[1]+elem+wrap[2];while(wrap[0]--)div=div.lastChild;if(jQuery.browser.msie){var tbody=!tags.indexOf("<table")&&tags.indexOf("<tbody")<0?div.firstChild&&div.firstChild.childNodes:wrap[1]=="<table>"&&tags.indexOf("<tbody")<0?div.childNodes:[];for(var j=tbody.length-1;j>=0;--j)if(jQuery.nodeName(tbody[j],"tbody")&&!tbody[j].childNodes.length)tbody[j].parentNode.removeChild(tbody[j]);if(/^\s/.test(elem))div.insertBefore(context.createTextNode(elem.match(/^\s*/)[0]),div.firstChild);}elem=jQuery.makeArray(div.childNodes);}if(elem.length===0&&(!jQuery.nodeName(elem,"form")&&!jQuery.nodeName(elem,"select")))return;if(elem[0]==undefined||jQuery.nodeName(elem,"form")||elem.options)ret.push(elem);else +ret=jQuery.merge(ret,elem);});return ret;},attr:function(elem,name,value){if(!elem||elem.nodeType==3||elem.nodeType==8)return undefined;var notxml=!jQuery.isXMLDoc(elem),set=value!==undefined,msie=jQuery.browser.msie;name=notxml&&jQuery.props[name]||name;if(elem.tagName){var special=/href|src|style/.test(name);if(name=="selected"&&jQuery.browser.safari)elem.parentNode.selectedIndex;if(name in elem&¬xml&&!special){if(set){if(name=="type"&&jQuery.nodeName(elem,"input")&&elem.parentNode)throw"type property can't be changed";elem[name]=value;}if(jQuery.nodeName(elem,"form")&&elem.getAttributeNode(name))return elem.getAttributeNode(name).nodeValue;return elem[name];}if(msie&¬xml&&name=="style")return jQuery.attr(elem.style,"cssText",value);if(set)elem.setAttribute(name,""+value);var attr=msie&¬xml&&special?elem.getAttribute(name,2):elem.getAttribute(name);return attr===null?undefined:attr;}if(msie&&name=="opacity"){if(set){elem.zoom=1;elem.filter=(elem.filter||"").replace(/alpha\([^)]*\)/,"")+(parseInt(value)+''=="NaN"?"":"alpha(opacity="+value*100+")");}return elem.filter&&elem.filter.indexOf("opacity=")>=0?(parseFloat(elem.filter.match(/opacity=([^)]*)/)[1])/100)+'':"";}name=name.replace(/-([a-z])/ig,function(all,letter){return letter.toUpperCase();});if(set)elem[name]=value;return elem[name];},trim:function(text){return(text||"").replace(/^\s+|\s+$/g,"");},makeArray:function(array){var ret=[];if(array!=null){var i=array.length;if(i==null||array.split||array.setInterval||array.call)ret[0]=array;else +while(i)ret[--i]=array[i];}return ret;},inArray:function(elem,array){for(var i=0,length=array.length;i<length;i++)if(array[i]===elem)return i;return-1;},merge:function(first,second){var i=0,elem,pos=first.length;if(jQuery.browser.msie){while(elem=second[i++])if(elem.nodeType!=8)first[pos++]=elem;}else +while(elem=second[i++])first[pos++]=elem;return first;},unique:function(array){var ret=[],done={};try{for(var i=0,length=array.length;i<length;i++){var id=jQuery.data(array[i]);if(!done[id]){done[id]=true;ret.push(array[i]);}}}catch(e){ret=array;}return ret;},grep:function(elems,callback,inv){var ret=[];for(var i=0,length=elems.length;i<length;i++)if(!inv!=!callback(elems[i],i))ret.push(elems[i]);return ret;},map:function(elems,callback){var ret=[];for(var i=0,length=elems.length;i<length;i++){var value=callback(elems[i],i);if(value!=null)ret[ret.length]=value;}return ret.concat.apply([],ret);}});var userAgent=navigator.userAgent.toLowerCase();jQuery.browser={version:(userAgent.match(/.+(?:rv|it|ra|ie)[\/: ]([\d.]+)/)||[])[1],safari:/webkit/.test(userAgent),opera:/opera/.test(userAgent),msie:/msie/.test(userAgent)&&!/opera/.test(userAgent),mozilla:/mozilla/.test(userAgent)&&!/(compatible|webkit)/.test(userAgent)};var styleFloat=jQuery.browser.msie?"styleFloat":"cssFloat";jQuery.extend({boxModel:!jQuery.browser.msie||document.compatMode=="CSS1Compat",props:{"for":"htmlFor","class":"className","float":styleFloat,cssFloat:styleFloat,styleFloat:styleFloat,readonly:"readOnly",maxlength:"maxLength",cellspacing:"cellSpacing"}});jQuery.each({parent:function(elem){return elem.parentNode;},parents:function(elem){return jQuery.dir(elem,"parentNode");},next:function(elem){return jQuery.nth(elem,2,"nextSibling");},prev:function(elem){return jQuery.nth(elem,2,"previousSibling");},nextAll:function(elem){return jQuery.dir(elem,"nextSibling");},prevAll:function(elem){return jQuery.dir(elem,"previousSibling");},siblings:function(elem){return jQuery.sibling(elem.parentNode.firstChild,elem);},children:function(elem){return jQuery.sibling(elem.firstChild);},contents:function(elem){return jQuery.nodeName(elem,"iframe")?elem.contentDocument||elem.contentWindow.document:jQuery.makeArray(elem.childNodes);}},function(name,fn){jQuery.fn[name]=function(selector){var ret=jQuery.map(this,fn);if(selector&&typeof selector=="string")ret=jQuery.multiFilter(selector,ret);return this.pushStack(jQuery.unique(ret));};});jQuery.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(name,original){jQuery.fn[name]=function(){var args=arguments;return this.each(function(){for(var i=0,length=args.length;i<length;i++)jQuery(args[i])[original](this);});};});jQuery.each({removeAttr:function(name){jQuery.attr(this,name,"");if(this.nodeType==1)this.removeAttribute(name);},addClass:function(classNames){jQuery.className.add(this,classNames);},removeClass:function(classNames){jQuery.className.remove(this,classNames);},toggleClass:function(classNames){jQuery.className[jQuery.className.has(this,classNames)?"remove":"add"](this,classNames);},remove:function(selector){if(!selector||jQuery.filter(selector,[this]).r.length){jQuery("*",this).add(this).each(function(){jQuery.event.remove(this);jQuery.removeData(this);});if(this.parentNode)this.parentNode.removeChild(this);}},empty:function(){jQuery(">*",this).remove();while(this.firstChild)this.removeChild(this.firstChild);}},function(name,fn){jQuery.fn[name]=function(){return this.each(fn,arguments);};});jQuery.each(["Height","Width"],function(i,name){var type=name.toLowerCase();jQuery.fn[type]=function(size){return this[0]==window?jQuery.browser.opera&&document.body["client"+name]||jQuery.browser.safari&&window["inner"+name]||document.compatMode=="CSS1Compat"&&document.documentElement["client"+name]||document.body["client"+name]:this[0]==document?Math.max(Math.max(document.body["scroll"+name],document.documentElement["scroll"+name]),Math.max(document.body["offset"+name],document.documentElement["offset"+name])):size==undefined?(this.length?jQuery.css(this[0],type):null):this.css(type,size.constructor==String?size:size+"px");};});function num(elem,prop){return elem[0]&&parseInt(jQuery.curCSS(elem[0],prop,true),10)||0;}var chars=jQuery.browser.safari&&parseInt(jQuery.browser.version)<417?"(?:[\\w*_-]|\\\\.)":"(?:[\\w\u0128-\uFFFF*_-]|\\\\.)",quickChild=new RegExp("^>\\s*("+chars+"+)"),quickID=new RegExp("^("+chars+"+)(#)("+chars+"+)"),quickClass=new RegExp("^([#.]?)("+chars+"*)");jQuery.extend({expr:{"":function(a,i,m){return m[2]=="*"||jQuery.nodeName(a,m[2]);},"#":function(a,i,m){return a.getAttribute("id")==m[2];},":":{lt:function(a,i,m){return i<m[3]-0;},gt:function(a,i,m){return i>m[3]-0;},nth:function(a,i,m){return m[3]-0==i;},eq:function(a,i,m){return m[3]-0==i;},first:function(a,i){return i==0;},last:function(a,i,m,r){return i==r.length-1;},even:function(a,i){return i%2==0;},odd:function(a,i){return i%2;},"first-child":function(a){return a.parentNode.getElementsByTagName("*")[0]==a;},"last-child":function(a){return jQuery.nth(a.parentNode.lastChild,1,"previousSibling")==a;},"only-child":function(a){return!jQuery.nth(a.parentNode.lastChild,2,"previousSibling");},parent:function(a){return a.firstChild;},empty:function(a){return!a.firstChild;},contains:function(a,i,m){return(a.textContent||a.innerText||jQuery(a).text()||"").indexOf(m[3])>=0;},visible:function(a){return"hidden"!=a.type&&jQuery.css(a,"display")!="none"&&jQuery.css(a,"visibility")!="hidden";},hidden:function(a){return"hidden"==a.type||jQuery.css(a,"display")=="none"||jQuery.css(a,"visibility")=="hidden";},enabled:function(a){return!a.disabled;},disabled:function(a){return a.disabled;},checked:function(a){return a.checked;},selected:function(a){return a.selected||jQuery.attr(a,"selected");},text:function(a){return"text"==a.type;},radio:function(a){return"radio"==a.type;},checkbox:function(a){return"checkbox"==a.type;},file:function(a){return"file"==a.type;},password:function(a){return"password"==a.type;},submit:function(a){return"submit"==a.type;},image:function(a){return"image"==a.type;},reset:function(a){return"reset"==a.type;},button:function(a){return"button"==a.type||jQuery.nodeName(a,"button");},input:function(a){return/input|select|textarea|button/i.test(a.nodeName);},has:function(a,i,m){return jQuery.find(m[3],a).length;},header:function(a){return/h\d/i.test(a.nodeName);},animated:function(a){return jQuery.grep(jQuery.timers,function(fn){return a==fn.elem;}).length;}}},parse:[/^(\[) *@?([\w-]+) *([!*$^~=]*) *('?"?)(.*?)\4 *\]/,/^(:)([\w-]+)\("?'?(.*?(\(.*?\))?[^(]*?)"?'?\)/,new RegExp("^([:.#]*)("+chars+"+)")],multiFilter:function(expr,elems,not){var old,cur=[];while(expr&&expr!=old){old=expr;var f=jQuery.filter(expr,elems,not);expr=f.t.replace(/^\s*,\s*/,"");cur=not?elems=f.r:jQuery.merge(cur,f.r);}return cur;},find:function(t,context){if(typeof t!="string")return[t];if(context&&context.nodeType!=1&&context.nodeType!=9)return[];context=context||document;var ret=[context],done=[],last,nodeName;while(t&&last!=t){var r=[];last=t;t=jQuery.trim(t);var foundToken=false,re=quickChild,m=re.exec(t);if(m){nodeName=m[1].toUpperCase();for(var i=0;ret[i];i++)for(var c=ret[i].firstChild;c;c=c.nextSibling)if(c.nodeType==1&&(nodeName=="*"||c.nodeName.toUpperCase()==nodeName))r.push(c);ret=r;t=t.replace(re,"");if(t.indexOf(" ")==0)continue;foundToken=true;}else{re=/^([>+~])\s*(\w*)/i;if((m=re.exec(t))!=null){r=[];var merge={};nodeName=m[2].toUpperCase();m=m[1];for(var j=0,rl=ret.length;j<rl;j++){var n=m=="~"||m=="+"?ret[j].nextSibling:ret[j].firstChild;for(;n;n=n.nextSibling)if(n.nodeType==1){var id=jQuery.data(n);if(m=="~"&&merge[id])break;if(!nodeName||n.nodeName.toUpperCase()==nodeName){if(m=="~")merge[id]=true;r.push(n);}if(m=="+")break;}}ret=r;t=jQuery.trim(t.replace(re,""));foundToken=true;}}if(t&&!foundToken){if(!t.indexOf(",")){if(context==ret[0])ret.shift();done=jQuery.merge(done,ret);r=ret=[context];t=" "+t.substr(1,t.length);}else{var re2=quickID;var m=re2.exec(t);if(m){m=[0,m[2],m[3],m[1]];}else{re2=quickClass;m=re2.exec(t);}m[2]=m[2].replace(/\\/g,"");var elem=ret[ret.length-1];if(m[1]=="#"&&elem&&elem.getElementById&&!jQuery.isXMLDoc(elem)){var oid=elem.getElementById(m[2]);if((jQuery.browser.msie||jQuery.browser.opera)&&oid&&typeof oid.id=="string"&&oid.id!=m[2])oid=jQuery('[@id="'+m[2]+'"]',elem)[0];ret=r=oid&&(!m[3]||jQuery.nodeName(oid,m[3]))?[oid]:[];}else{for(var i=0;ret[i];i++){var tag=m[1]=="#"&&m[3]?m[3]:m[1]!=""||m[0]==""?"*":m[2];if(tag=="*"&&ret[i].nodeName.toLowerCase()=="object")tag="param";r=jQuery.merge(r,ret[i].getElementsByTagName(tag));}if(m[1]==".")r=jQuery.classFilter(r,m[2]);if(m[1]=="#"){var tmp=[];for(var i=0;r[i];i++)if(r[i].getAttribute("id")==m[2]){tmp=[r[i]];break;}r=tmp;}ret=r;}t=t.replace(re2,"");}}if(t){var val=jQuery.filter(t,r);ret=r=val.r;t=jQuery.trim(val.t);}}if(t)ret=[];if(ret&&context==ret[0])ret.shift();done=jQuery.merge(done,ret);return done;},classFilter:function(r,m,not){m=" "+m+" ";var tmp=[];for(var i=0;r[i];i++){var pass=(" "+r[i].className+" ").indexOf(m)>=0;if(!not&&pass||not&&!pass)tmp.push(r[i]);}return tmp;},filter:function(t,r,not){var last;while(t&&t!=last){last=t;var p=jQuery.parse,m;for(var i=0;p[i];i++){m=p[i].exec(t);if(m){t=t.substring(m[0].length);m[2]=m[2].replace(/\\/g,"");break;}}if(!m)break;if(m[1]==":"&&m[2]=="not")r=isSimple.test(m[3])?jQuery.filter(m[3],r,true).r:jQuery(r).not(m[3]);else if(m[1]==".")r=jQuery.classFilter(r,m[2],not);else if(m[1]=="["){var tmp=[],type=m[3];for(var i=0,rl=r.length;i<rl;i++){var a=r[i],z=a[jQuery.props[m[2]]||m[2]];if(z==null||/href|src|selected/.test(m[2]))z=jQuery.attr(a,m[2])||'';if((type==""&&!!z||type=="="&&z==m[5]||type=="!="&&z!=m[5]||type=="^="&&z&&!z.indexOf(m[5])||type=="$="&&z.substr(z.length-m[5].length)==m[5]||(type=="*="||type=="~=")&&z.indexOf(m[5])>=0)^not)tmp.push(a);}r=tmp;}else if(m[1]==":"&&m[2]=="nth-child"){var merge={},tmp=[],test=/(-?)(\d*)n((?:\+|-)?\d*)/.exec(m[3]=="even"&&"2n"||m[3]=="odd"&&"2n+1"||!/\D/.test(m[3])&&"0n+"+m[3]||m[3]),first=(test[1]+(test[2]||1))-0,last=test[3]-0;for(var i=0,rl=r.length;i<rl;i++){var node=r[i],parentNode=node.parentNode,id=jQuery.data(parentNode);if(!merge[id]){var c=1;for(var n=parentNode.firstChild;n;n=n.nextSibling)if(n.nodeType==1)n.nodeIndex=c++;merge[id]=true;}var add=false;if(first==0){if(node.nodeIndex==last)add=true;}else if((node.nodeIndex-last)%first==0&&(node.nodeIndex-last)/first>=0)add=true;if(add^not)tmp.push(node);}r=tmp;}else{var fn=jQuery.expr[m[1]];if(typeof fn=="object")fn=fn[m[2]];if(typeof fn=="string")fn=eval("false||function(a,i){return "+fn+";}");r=jQuery.grep(r,function(elem,i){return fn(elem,i,m,r);},not);}}return{r:r,t:t};},dir:function(elem,dir){var matched=[],cur=elem[dir];while(cur&&cur!=document){if(cur.nodeType==1)matched.push(cur);cur=cur[dir];}return matched;},nth:function(cur,result,dir,elem){result=result||1;var num=0;for(;cur;cur=cur[dir])if(cur.nodeType==1&&++num==result)break;return cur;},sibling:function(n,elem){var r=[];for(;n;n=n.nextSibling){if(n.nodeType==1&&n!=elem)r.push(n);}return r;}});jQuery.event={add:function(elem,types,handler,data){if(elem.nodeType==3||elem.nodeType==8)return;if(jQuery.browser.msie&&elem.setInterval)elem=window;if(!handler.guid)handler.guid=this.guid++;if(data!=undefined){var fn=handler;handler=this.proxy(fn,function(){return fn.apply(this,arguments);});handler.data=data;}var events=jQuery.data(elem,"events")||jQuery.data(elem,"events",{}),handle=jQuery.data(elem,"handle")||jQuery.data(elem,"handle",function(){if(typeof jQuery!="undefined"&&!jQuery.event.triggered)return jQuery.event.handle.apply(arguments.callee.elem,arguments);});handle.elem=elem;jQuery.each(types.split(/\s+/),function(index,type){var parts=type.split(".");type=parts[0];handler.type=parts[1];var handlers=events[type];if(!handlers){handlers=events[type]={};if(!jQuery.event.special[type]||jQuery.event.special[type].setup.call(elem)===false){if(elem.addEventListener)elem.addEventListener(type,handle,false);else if(elem.attachEvent)elem.attachEvent("on"+type,handle);}}handlers[handler.guid]=handler;jQuery.event.global[type]=true;});elem=null;},guid:1,global:{},remove:function(elem,types,handler){if(elem.nodeType==3||elem.nodeType==8)return;var events=jQuery.data(elem,"events"),ret,index;if(events){if(types==undefined||(typeof types=="string"&&types.charAt(0)=="."))for(var type in events)this.remove(elem,type+(types||""));else{if(types.type){handler=types.handler;types=types.type;}jQuery.each(types.split(/\s+/),function(index,type){var parts=type.split(".");type=parts[0];if(events[type]){if(handler)delete events[type][handler.guid];else +for(handler in events[type])if(!parts[1]||events[type][handler].type==parts[1])delete events[type][handler];for(ret in events[type])break;if(!ret){if(!jQuery.event.special[type]||jQuery.event.special[type].teardown.call(elem)===false){if(elem.removeEventListener)elem.removeEventListener(type,jQuery.data(elem,"handle"),false);else if(elem.detachEvent)elem.detachEvent("on"+type,jQuery.data(elem,"handle"));}ret=null;delete events[type];}}});}for(ret in events)break;if(!ret){var handle=jQuery.data(elem,"handle");if(handle)handle.elem=null;jQuery.removeData(elem,"events");jQuery.removeData(elem,"handle");}}},trigger:function(type,data,elem,donative,extra){data=jQuery.makeArray(data);if(type.indexOf("!")>=0){type=type.slice(0,-1);var exclusive=true;}if(!elem){if(this.global[type])jQuery("*").add([window,document]).trigger(type,data);}else{if(elem.nodeType==3||elem.nodeType==8)return undefined;var val,ret,fn=jQuery.isFunction(elem[type]||null),event=!data[0]||!data[0].preventDefault;if(event){data.unshift({type:type,target:elem,preventDefault:function(){},stopPropagation:function(){},timeStamp:now()});data[0][expando]=true;}data[0].type=type;if(exclusive)data[0].exclusive=true;var handle=jQuery.data(elem,"handle");if(handle)val=handle.apply(elem,data);if((!fn||(jQuery.nodeName(elem,'a')&&type=="click"))&&elem["on"+type]&&elem["on"+type].apply(elem,data)===false)val=false;if(event)data.shift();if(extra&&jQuery.isFunction(extra)){ret=extra.apply(elem,val==null?data:data.concat(val));if(ret!==undefined)val=ret;}if(fn&&donative!==false&&val!==false&&!(jQuery.nodeName(elem,'a')&&type=="click")){this.triggered=true;try{elem[type]();}catch(e){}}this.triggered=false;}return val;},handle:function(event){var val,ret,namespace,all,handlers;event=arguments[0]=jQuery.event.fix(event||window.event);namespace=event.type.split(".");event.type=namespace[0];namespace=namespace[1];all=!namespace&&!event.exclusive;handlers=(jQuery.data(this,"events")||{})[event.type];for(var j in handlers){var handler=handlers[j];if(all||handler.type==namespace){event.handler=handler;event.data=handler.data;ret=handler.apply(this,arguments);if(val!==false)val=ret;if(ret===false){event.preventDefault();event.stopPropagation();}}}return val;},fix:function(event){if(event[expando]==true)return event;var originalEvent=event;event={originalEvent:originalEvent};var props="altKey attrChange attrName bubbles button cancelable charCode clientX clientY ctrlKey currentTarget data detail eventPhase fromElement handler keyCode metaKey newValue originalTarget pageX pageY prevValue relatedNode relatedTarget screenX screenY shiftKey srcElement target timeStamp toElement type view wheelDelta which".split(" ");for(var i=props.length;i;i--)event[props[i]]=originalEvent[props[i]];event[expando]=true;event.preventDefault=function(){if(originalEvent.preventDefault)originalEvent.preventDefault();originalEvent.returnValue=false;};event.stopPropagation=function(){if(originalEvent.stopPropagation)originalEvent.stopPropagation();originalEvent.cancelBubble=true;};event.timeStamp=event.timeStamp||now();if(!event.target)event.target=event.srcElement||document;if(event.target.nodeType==3)event.target=event.target.parentNode;if(!event.relatedTarget&&event.fromElement)event.relatedTarget=event.fromElement==event.target?event.toElement:event.fromElement;if(event.pageX==null&&event.clientX!=null){var doc=document.documentElement,body=document.body;event.pageX=event.clientX+(doc&&doc.scrollLeft||body&&body.scrollLeft||0)-(doc.clientLeft||0);event.pageY=event.clientY+(doc&&doc.scrollTop||body&&body.scrollTop||0)-(doc.clientTop||0);}if(!event.which&&((event.charCode||event.charCode===0)?event.charCode:event.keyCode))event.which=event.charCode||event.keyCode;if(!event.metaKey&&event.ctrlKey)event.metaKey=event.ctrlKey;if(!event.which&&event.button)event.which=(event.button&1?1:(event.button&2?3:(event.button&4?2:0)));return event;},proxy:function(fn,proxy){proxy.guid=fn.guid=fn.guid||proxy.guid||this.guid++;return proxy;},special:{ready:{setup:function(){bindReady();return;},teardown:function(){return;}},mouseenter:{setup:function(){if(jQuery.browser.msie)return false;jQuery(this).bind("mouseover",jQuery.event.special.mouseenter.handler);return true;},teardown:function(){if(jQuery.browser.msie)return false;jQuery(this).unbind("mouseover",jQuery.event.special.mouseenter.handler);return true;},handler:function(event){if(withinElement(event,this))return true;event.type="mouseenter";return jQuery.event.handle.apply(this,arguments);}},mouseleave:{setup:function(){if(jQuery.browser.msie)return false;jQuery(this).bind("mouseout",jQuery.event.special.mouseleave.handler);return true;},teardown:function(){if(jQuery.browser.msie)return false;jQuery(this).unbind("mouseout",jQuery.event.special.mouseleave.handler);return true;},handler:function(event){if(withinElement(event,this))return true;event.type="mouseleave";return jQuery.event.handle.apply(this,arguments);}}}};jQuery.fn.extend({bind:function(type,data,fn){return type=="unload"?this.one(type,data,fn):this.each(function(){jQuery.event.add(this,type,fn||data,fn&&data);});},one:function(type,data,fn){var one=jQuery.event.proxy(fn||data,function(event){jQuery(this).unbind(event,one);return(fn||data).apply(this,arguments);});return this.each(function(){jQuery.event.add(this,type,one,fn&&data);});},unbind:function(type,fn){return this.each(function(){jQuery.event.remove(this,type,fn);});},trigger:function(type,data,fn){return this.each(function(){jQuery.event.trigger(type,data,this,true,fn);});},triggerHandler:function(type,data,fn){return this[0]&&jQuery.event.trigger(type,data,this[0],false,fn);},toggle:function(fn){var args=arguments,i=1;while(i<args.length)jQuery.event.proxy(fn,args[i++]);return this.click(jQuery.event.proxy(fn,function(event){this.lastToggle=(this.lastToggle||0)%i;event.preventDefault();return args[this.lastToggle++].apply(this,arguments)||false;}));},hover:function(fnOver,fnOut){return this.bind('mouseenter',fnOver).bind('mouseleave',fnOut);},ready:function(fn){bindReady();if(jQuery.isReady)fn.call(document,jQuery);else +jQuery.readyList.push(function(){return fn.call(this,jQuery);});return this;}});jQuery.extend({isReady:false,readyList:[],ready:function(){if(!jQuery.isReady){jQuery.isReady=true;if(jQuery.readyList){jQuery.each(jQuery.readyList,function(){this.call(document);});jQuery.readyList=null;}jQuery(document).triggerHandler("ready");}}});var readyBound=false;function bindReady(){if(readyBound)return;readyBound=true;if(document.addEventListener&&!jQuery.browser.opera)document.addEventListener("DOMContentLoaded",jQuery.ready,false);if(jQuery.browser.msie&&window==top)(function(){if(jQuery.isReady)return;try{document.documentElement.doScroll("left");}catch(error){setTimeout(arguments.callee,0);return;}jQuery.ready();})();if(jQuery.browser.opera)document.addEventListener("DOMContentLoaded",function(){if(jQuery.isReady)return;for(var i=0;i<document.styleSheets.length;i++)if(document.styleSheets[i].disabled){setTimeout(arguments.callee,0);return;}jQuery.ready();},false);if(jQuery.browser.safari){var numStyles;(function(){if(jQuery.isReady)return;if(document.readyState!="loaded"&&document.readyState!="complete"){setTimeout(arguments.callee,0);return;}if(numStyles===undefined)numStyles=jQuery("style, link[rel=stylesheet]").length;if(document.styleSheets.length!=numStyles){setTimeout(arguments.callee,0);return;}jQuery.ready();})();}jQuery.event.add(window,"load",jQuery.ready);}jQuery.each(("blur,focus,load,resize,scroll,unload,click,dblclick,"+"mousedown,mouseup,mousemove,mouseover,mouseout,change,select,"+"submit,keydown,keypress,keyup,error").split(","),function(i,name){jQuery.fn[name]=function(fn){return fn?this.bind(name,fn):this.trigger(name);};});var withinElement=function(event,elem){var parent=event.relatedTarget;while(parent&&parent!=elem)try{parent=parent.parentNode;}catch(error){parent=elem;}return parent==elem;};jQuery(window).bind("unload",function(){jQuery("*").add(document).unbind();});jQuery.fn.extend({_load:jQuery.fn.load,load:function(url,params,callback){if(typeof url!='string')return this._load(url);var off=url.indexOf(" ");if(off>=0){var selector=url.slice(off,url.length);url=url.slice(0,off);}callback=callback||function(){};var type="GET";if(params)if(jQuery.isFunction(params)){callback=params;params=null;}else{params=jQuery.param(params);type="POST";}var self=this;jQuery.ajax({url:url,type:type,dataType:"html",data:params,complete:function(res,status){if(status=="success"||status=="notmodified")self.html(selector?jQuery("<div/>").append(res.responseText.replace(/<script(.|\s)*?\/script>/g,"")).find(selector):res.responseText);self.each(callback,[res.responseText,status,res]);}});return this;},serialize:function(){return jQuery.param(this.serializeArray());},serializeArray:function(){return this.map(function(){return jQuery.nodeName(this,"form")?jQuery.makeArray(this.elements):this;}).filter(function(){return this.name&&!this.disabled&&(this.checked||/select|textarea/i.test(this.nodeName)||/text|hidden|password/i.test(this.type));}).map(function(i,elem){var val=jQuery(this).val();return val==null?null:val.constructor==Array?jQuery.map(val,function(val,i){return{name:elem.name,value:val};}):{name:elem.name,value:val};}).get();}});jQuery.each("ajaxStart,ajaxStop,ajaxComplete,ajaxError,ajaxSuccess,ajaxSend".split(","),function(i,o){jQuery.fn[o]=function(f){return this.bind(o,f);};});var jsc=now();jQuery.extend({get:function(url,data,callback,type){if(jQuery.isFunction(data)){callback=data;data=null;}return jQuery.ajax({type:"GET",url:url,data:data,success:callback,dataType:type});},getScript:function(url,callback){return jQuery.get(url,null,callback,"script");},getJSON:function(url,data,callback){return jQuery.get(url,data,callback,"json");},post:function(url,data,callback,type){if(jQuery.isFunction(data)){callback=data;data={};}return jQuery.ajax({type:"POST",url:url,data:data,success:callback,dataType:type});},ajaxSetup:function(settings){jQuery.extend(jQuery.ajaxSettings,settings);},ajaxSettings:{url:location.href,global:true,type:"GET",timeout:0,contentType:"application/x-www-form-urlencoded",processData:true,async:true,data:null,username:null,password:null,accepts:{xml:"application/xml, text/xml",html:"text/html",script:"text/javascript, application/javascript",json:"application/json, text/javascript",text:"text/plain",_default:"*/*"}},lastModified:{},ajax:function(s){s=jQuery.extend(true,s,jQuery.extend(true,{},jQuery.ajaxSettings,s));var jsonp,jsre=/=\?(&|$)/g,status,data,type=s.type.toUpperCase();if(s.data&&s.processData&&typeof s.data!="string")s.data=jQuery.param(s.data);if(s.dataType=="jsonp"){if(type=="GET"){if(!s.url.match(jsre))s.url+=(s.url.match(/\?/)?"&":"?")+(s.jsonp||"callback")+"=?";}else if(!s.data||!s.data.match(jsre))s.data=(s.data?s.data+"&":"")+(s.jsonp||"callback")+"=?";s.dataType="json";}if(s.dataType=="json"&&(s.data&&s.data.match(jsre)||s.url.match(jsre))){jsonp="jsonp"+jsc++;if(s.data)s.data=(s.data+"").replace(jsre,"="+jsonp+"$1");s.url=s.url.replace(jsre,"="+jsonp+"$1");s.dataType="script";window[jsonp]=function(tmp){data=tmp;success();complete();window[jsonp]=undefined;try{delete window[jsonp];}catch(e){}if(head)head.removeChild(script);};}if(s.dataType=="script"&&s.cache==null)s.cache=false;if(s.cache===false&&type=="GET"){var ts=now();var ret=s.url.replace(/(\?|&)_=.*?(&|$)/,"$1_="+ts+"$2");s.url=ret+((ret==s.url)?(s.url.match(/\?/)?"&":"?")+"_="+ts:"");}if(s.data&&type=="GET"){s.url+=(s.url.match(/\?/)?"&":"?")+s.data;s.data=null;}if(s.global&&!jQuery.active++)jQuery.event.trigger("ajaxStart");var remote=/^(?:\w+:)?\/\/([^\/?#]+)/;if(s.dataType=="script"&&type=="GET"&&remote.test(s.url)&&remote.exec(s.url)[1]!=location.host){var head=document.getElementsByTagName("head")[0];var script=document.createElement("script");script.src=s.url;if(s.scriptCharset)script.charset=s.scriptCharset;if(!jsonp){var done=false;script.onload=script.onreadystatechange=function(){if(!done&&(!this.readyState||this.readyState=="loaded"||this.readyState=="complete")){done=true;success();complete();head.removeChild(script);}};}head.appendChild(script);return undefined;}var requestDone=false;var xhr=window.ActiveXObject?new ActiveXObject("Microsoft.XMLHTTP"):new XMLHttpRequest();if(s.username)xhr.open(type,s.url,s.async,s.username,s.password);else +xhr.open(type,s.url,s.async);try{if(s.data)xhr.setRequestHeader("Content-Type",s.contentType);if(s.ifModified)xhr.setRequestHeader("If-Modified-Since",jQuery.lastModified[s.url]||"Thu, 01 Jan 1970 00:00:00 GMT");xhr.setRequestHeader("X-Requested-With","XMLHttpRequest");xhr.setRequestHeader("Accept",s.dataType&&s.accepts[s.dataType]?s.accepts[s.dataType]+", */*":s.accepts._default);}catch(e){}if(s.beforeSend&&s.beforeSend(xhr,s)===false){s.global&&jQuery.active--;xhr.abort();return false;}if(s.global)jQuery.event.trigger("ajaxSend",[xhr,s]);var onreadystatechange=function(isTimeout){if(!requestDone&&xhr&&(xhr.readyState==4||isTimeout=="timeout")){requestDone=true;if(ival){clearInterval(ival);ival=null;}status=isTimeout=="timeout"&&"timeout"||!jQuery.httpSuccess(xhr)&&"error"||s.ifModified&&jQuery.httpNotModified(xhr,s.url)&&"notmodified"||"success";if(status=="success"){try{data=jQuery.httpData(xhr,s.dataType,s.dataFilter);}catch(e){status="parsererror";}}if(status=="success"){var modRes;try{modRes=xhr.getResponseHeader("Last-Modified");}catch(e){}if(s.ifModified&&modRes)jQuery.lastModified[s.url]=modRes;if(!jsonp)success();}else +jQuery.handleError(s,xhr,status);complete();if(s.async)xhr=null;}};if(s.async){var ival=setInterval(onreadystatechange,13);if(s.timeout>0)setTimeout(function(){if(xhr){xhr.abort();if(!requestDone)onreadystatechange("timeout");}},s.timeout);}try{xhr.send(s.data);}catch(e){jQuery.handleError(s,xhr,null,e);}if(!s.async)onreadystatechange();function success(){if(s.success)s.success(data,status);if(s.global)jQuery.event.trigger("ajaxSuccess",[xhr,s]);}function complete(){if(s.complete)s.complete(xhr,status);if(s.global)jQuery.event.trigger("ajaxComplete",[xhr,s]);if(s.global&&!--jQuery.active)jQuery.event.trigger("ajaxStop");}return xhr;},handleError:function(s,xhr,status,e){if(s.error)s.error(xhr,status,e);if(s.global)jQuery.event.trigger("ajaxError",[xhr,s,e]);},active:0,httpSuccess:function(xhr){try{return!xhr.status&&location.protocol=="file:"||(xhr.status>=200&&xhr.status<300)||xhr.status==304||xhr.status==1223||jQuery.browser.safari&&xhr.status==undefined;}catch(e){}return false;},httpNotModified:function(xhr,url){try{var xhrRes=xhr.getResponseHeader("Last-Modified");return xhr.status==304||xhrRes==jQuery.lastModified[url]||jQuery.browser.safari&&xhr.status==undefined;}catch(e){}return false;},httpData:function(xhr,type,filter){var ct=xhr.getResponseHeader("content-type"),xml=type=="xml"||!type&&ct&&ct.indexOf("xml")>=0,data=xml?xhr.responseXML:xhr.responseText;if(xml&&data.documentElement.tagName=="parsererror")throw"parsererror";if(filter)data=filter(data,type);if(type=="script")jQuery.globalEval(data);if(type=="json")data=eval("("+data+")");return data;},param:function(a){var s=[];if(a.constructor==Array||a.jquery)jQuery.each(a,function(){s.push(encodeURIComponent(this.name)+"="+encodeURIComponent(this.value));});else +for(var j in a)if(a[j]&&a[j].constructor==Array)jQuery.each(a[j],function(){s.push(encodeURIComponent(j)+"="+encodeURIComponent(this));});else +s.push(encodeURIComponent(j)+"="+encodeURIComponent(jQuery.isFunction(a[j])?a[j]():a[j]));return s.join("&").replace(/%20/g,"+");}});jQuery.fn.extend({show:function(speed,callback){return speed?this.animate({height:"show",width:"show",opacity:"show"},speed,callback):this.filter(":hidden").each(function(){this.style.display=this.oldblock||"";if(jQuery.css(this,"display")=="none"){var elem=jQuery("<"+this.tagName+" />").appendTo("body");this.style.display=elem.css("display");if(this.style.display=="none")this.style.display="block";elem.remove();}}).end();},hide:function(speed,callback){return speed?this.animate({height:"hide",width:"hide",opacity:"hide"},speed,callback):this.filter(":visible").each(function(){this.oldblock=this.oldblock||jQuery.css(this,"display");this.style.display="none";}).end();},_toggle:jQuery.fn.toggle,toggle:function(fn,fn2){return jQuery.isFunction(fn)&&jQuery.isFunction(fn2)?this._toggle.apply(this,arguments):fn?this.animate({height:"toggle",width:"toggle",opacity:"toggle"},fn,fn2):this.each(function(){jQuery(this)[jQuery(this).is(":hidden")?"show":"hide"]();});},slideDown:function(speed,callback){return this.animate({height:"show"},speed,callback);},slideUp:function(speed,callback){return this.animate({height:"hide"},speed,callback);},slideToggle:function(speed,callback){return this.animate({height:"toggle"},speed,callback);},fadeIn:function(speed,callback){return this.animate({opacity:"show"},speed,callback);},fadeOut:function(speed,callback){return this.animate({opacity:"hide"},speed,callback);},fadeTo:function(speed,to,callback){return this.animate({opacity:to},speed,callback);},animate:function(prop,speed,easing,callback){var optall=jQuery.speed(speed,easing,callback);return this[optall.queue===false?"each":"queue"](function(){if(this.nodeType!=1)return false;var opt=jQuery.extend({},optall),p,hidden=jQuery(this).is(":hidden"),self=this;for(p in prop){if(prop[p]=="hide"&&hidden||prop[p]=="show"&&!hidden)return opt.complete.call(this);if(p=="height"||p=="width"){opt.display=jQuery.css(this,"display");opt.overflow=this.style.overflow;}}if(opt.overflow!=null)this.style.overflow="hidden";opt.curAnim=jQuery.extend({},prop);jQuery.each(prop,function(name,val){var e=new jQuery.fx(self,opt,name);if(/toggle|show|hide/.test(val))e[val=="toggle"?hidden?"show":"hide":val](prop);else{var parts=val.toString().match(/^([+-]=)?([\d+-.]+)(.*)$/),start=e.cur(true)||0;if(parts){var end=parseFloat(parts[2]),unit=parts[3]||"px";if(unit!="px"){self.style[name]=(end||1)+unit;start=((end||1)/e.cur(true))*start;self.style[name]=start+unit;}if(parts[1])end=((parts[1]=="-="?-1:1)*end)+start;e.custom(start,end,unit);}else +e.custom(start,val,"");}});return true;});},queue:function(type,fn){if(jQuery.isFunction(type)||(type&&type.constructor==Array)){fn=type;type="fx";}if(!type||(typeof type=="string"&&!fn))return queue(this[0],type);return this.each(function(){if(fn.constructor==Array)queue(this,type,fn);else{queue(this,type).push(fn);if(queue(this,type).length==1)fn.call(this);}});},stop:function(clearQueue,gotoEnd){var timers=jQuery.timers;if(clearQueue)this.queue([]);this.each(function(){for(var i=timers.length-1;i>=0;i--)if(timers[i].elem==this){if(gotoEnd)timers[i](true);timers.splice(i,1);}});if(!gotoEnd)this.dequeue();return this;}});var queue=function(elem,type,array){if(elem){type=type||"fx";var q=jQuery.data(elem,type+"queue");if(!q||array)q=jQuery.data(elem,type+"queue",jQuery.makeArray(array));}return q;};jQuery.fn.dequeue=function(type){type=type||"fx";return this.each(function(){var q=queue(this,type);q.shift();if(q.length)q[0].call(this);});};jQuery.extend({speed:function(speed,easing,fn){var opt=speed&&speed.constructor==Object?speed:{complete:fn||!fn&&easing||jQuery.isFunction(speed)&&speed,duration:speed,easing:fn&&easing||easing&&easing.constructor!=Function&&easing};opt.duration=(opt.duration&&opt.duration.constructor==Number?opt.duration:jQuery.fx.speeds[opt.duration])||jQuery.fx.speeds.def;opt.old=opt.complete;opt.complete=function(){if(opt.queue!==false)jQuery(this).dequeue();if(jQuery.isFunction(opt.old))opt.old.call(this);};return opt;},easing:{linear:function(p,n,firstNum,diff){return firstNum+diff*p;},swing:function(p,n,firstNum,diff){return((-Math.cos(p*Math.PI)/2)+0.5)*diff+firstNum;}},timers:[],timerId:null,fx:function(elem,options,prop){this.options=options;this.elem=elem;this.prop=prop;if(!options.orig)options.orig={};}});jQuery.fx.prototype={update:function(){if(this.options.step)this.options.step.call(this.elem,this.now,this);(jQuery.fx.step[this.prop]||jQuery.fx.step._default)(this);if(this.prop=="height"||this.prop=="width")this.elem.style.display="block";},cur:function(force){if(this.elem[this.prop]!=null&&this.elem.style[this.prop]==null)return this.elem[this.prop];var r=parseFloat(jQuery.css(this.elem,this.prop,force));return r&&r>-10000?r:parseFloat(jQuery.curCSS(this.elem,this.prop))||0;},custom:function(from,to,unit){this.startTime=now();this.start=from;this.end=to;this.unit=unit||this.unit||"px";this.now=this.start;this.pos=this.state=0;this.update();var self=this;function t(gotoEnd){return self.step(gotoEnd);}t.elem=this.elem;jQuery.timers.push(t);if(jQuery.timerId==null){jQuery.timerId=setInterval(function(){var timers=jQuery.timers;for(var i=0;i<timers.length;i++)if(!timers[i]())timers.splice(i--,1);if(!timers.length){clearInterval(jQuery.timerId);jQuery.timerId=null;}},13);}},show:function(){this.options.orig[this.prop]=jQuery.attr(this.elem.style,this.prop);this.options.show=true;this.custom(0,this.cur());if(this.prop=="width"||this.prop=="height")this.elem.style[this.prop]="1px";jQuery(this.elem).show();},hide:function(){this.options.orig[this.prop]=jQuery.attr(this.elem.style,this.prop);this.options.hide=true;this.custom(this.cur(),0);},step:function(gotoEnd){var t=now();if(gotoEnd||t>this.options.duration+this.startTime){this.now=this.end;this.pos=this.state=1;this.update();this.options.curAnim[this.prop]=true;var done=true;for(var i in this.options.curAnim)if(this.options.curAnim[i]!==true)done=false;if(done){if(this.options.display!=null){this.elem.style.overflow=this.options.overflow;this.elem.style.display=this.options.display;if(jQuery.css(this.elem,"display")=="none")this.elem.style.display="block";}if(this.options.hide)this.elem.style.display="none";if(this.options.hide||this.options.show)for(var p in this.options.curAnim)jQuery.attr(this.elem.style,p,this.options.orig[p]);}if(done)this.options.complete.call(this.elem);return false;}else{var n=t-this.startTime;this.state=n/this.options.duration;this.pos=jQuery.easing[this.options.easing||(jQuery.easing.swing?"swing":"linear")](this.state,n,0,1,this.options.duration);this.now=this.start+((this.end-this.start)*this.pos);this.update();}return true;}};jQuery.extend(jQuery.fx,{speeds:{slow:600,fast:200,def:400},step:{scrollLeft:function(fx){fx.elem.scrollLeft=fx.now;},scrollTop:function(fx){fx.elem.scrollTop=fx.now;},opacity:function(fx){jQuery.attr(fx.elem.style,"opacity",fx.now);},_default:function(fx){fx.elem.style[fx.prop]=fx.now+fx.unit;}}});jQuery.fn.offset=function(){var left=0,top=0,elem=this[0],results;if(elem)with(jQuery.browser){var parent=elem.parentNode,offsetChild=elem,offsetParent=elem.offsetParent,doc=elem.ownerDocument,safari2=safari&&parseInt(version)<522&&!/adobeair/i.test(userAgent),css=jQuery.curCSS,fixed=css(elem,"position")=="fixed";if(elem.getBoundingClientRect){var box=elem.getBoundingClientRect();add(box.left+Math.max(doc.documentElement.scrollLeft,doc.body.scrollLeft),box.top+Math.max(doc.documentElement.scrollTop,doc.body.scrollTop));add(-doc.documentElement.clientLeft,-doc.documentElement.clientTop);}else{add(elem.offsetLeft,elem.offsetTop);while(offsetParent){add(offsetParent.offsetLeft,offsetParent.offsetTop);if(mozilla&&!/^t(able|d|h)$/i.test(offsetParent.tagName)||safari&&!safari2)border(offsetParent);if(!fixed&&css(offsetParent,"position")=="fixed")fixed=true;offsetChild=/^body$/i.test(offsetParent.tagName)?offsetChild:offsetParent;offsetParent=offsetParent.offsetParent;}while(parent&&parent.tagName&&!/^body|html$/i.test(parent.tagName)){if(!/^inline|table.*$/i.test(css(parent,"display")))add(-parent.scrollLeft,-parent.scrollTop);if(mozilla&&css(parent,"overflow")!="visible")border(parent);parent=parent.parentNode;}if((safari2&&(fixed||css(offsetChild,"position")=="absolute"))||(mozilla&&css(offsetChild,"position")!="absolute"))add(-doc.body.offsetLeft,-doc.body.offsetTop);if(fixed)add(Math.max(doc.documentElement.scrollLeft,doc.body.scrollLeft),Math.max(doc.documentElement.scrollTop,doc.body.scrollTop));}results={top:top,left:left};}function border(elem){add(jQuery.curCSS(elem,"borderLeftWidth",true),jQuery.curCSS(elem,"borderTopWidth",true));}function add(l,t){left+=parseInt(l,10)||0;top+=parseInt(t,10)||0;}return results;};jQuery.fn.extend({position:function(){var left=0,top=0,results;if(this[0]){var offsetParent=this.offsetParent(),offset=this.offset(),parentOffset=/^body|html$/i.test(offsetParent[0].tagName)?{top:0,left:0}:offsetParent.offset();offset.top-=num(this,'marginTop');offset.left-=num(this,'marginLeft');parentOffset.top+=num(offsetParent,'borderTopWidth');parentOffset.left+=num(offsetParent,'borderLeftWidth');results={top:offset.top-parentOffset.top,left:offset.left-parentOffset.left};}return results;},offsetParent:function(){var offsetParent=this[0].offsetParent;while(offsetParent&&(!/^body|html$/i.test(offsetParent.tagName)&&jQuery.css(offsetParent,'position')=='static'))offsetParent=offsetParent.offsetParent;return jQuery(offsetParent);}});jQuery.each(['Left','Top'],function(i,name){var method='scroll'+name;jQuery.fn[method]=function(val){if(!this[0])return;return val!=undefined?this.each(function(){this==window||this==document?window.scrollTo(!i?val:jQuery(window).scrollLeft(),i?val:jQuery(window).scrollTop()):this[method]=val;}):this[0]==window||this[0]==document?self[i?'pageYOffset':'pageXOffset']||jQuery.boxModel&&document.documentElement[method]||document.body[method]:this[0][method];};});jQuery.each(["Height","Width"],function(i,name){var tl=i?"Left":"Top",br=i?"Right":"Bottom";jQuery.fn["inner"+name]=function(){return this[name.toLowerCase()]()+num(this,"padding"+tl)+num(this,"padding"+br);};jQuery.fn["outer"+name]=function(margin){return this["inner"+name]()+num(this,"border"+tl+"Width")+num(this,"border"+br+"Width")+(margin?num(this,"margin"+tl)+num(this,"margin"+br):0);};});})(); \ No newline at end of file diff --git a/test_data/chrome_cache/f_000029 b/test_data/chrome_cache/f_000029 new file mode 100644 index 0000000..2b84bd6 Binary files /dev/null and b/test_data/chrome_cache/f_000029 differ diff --git a/test_data/chrome_cache/f_00002a b/test_data/chrome_cache/f_00002a new file mode 100644 index 0000000..7490f4c Binary files /dev/null and b/test_data/chrome_cache/f_00002a differ diff --git a/test_data/chrome_cache/f_00002b b/test_data/chrome_cache/f_00002b new file mode 100644 index 0000000..36df255 Binary files /dev/null and b/test_data/chrome_cache/f_00002b differ diff --git a/test_data/chrome_cache/f_00002c b/test_data/chrome_cache/f_00002c new file mode 100644 index 0000000..b7d914c Binary files /dev/null and b/test_data/chrome_cache/f_00002c differ diff --git a/test_data/chrome_cache/f_00002d b/test_data/chrome_cache/f_00002d new file mode 100644 index 0000000..9148872 Binary files /dev/null and b/test_data/chrome_cache/f_00002d differ diff --git a/test_data/chrome_cache/f_00002e b/test_data/chrome_cache/f_00002e new file mode 100644 index 0000000..28917d2 Binary files /dev/null and b/test_data/chrome_cache/f_00002e differ diff --git a/test_data/chrome_cache/f_00002f b/test_data/chrome_cache/f_00002f new file mode 100644 index 0000000..aef6976 Binary files /dev/null and b/test_data/chrome_cache/f_00002f differ diff --git a/test_data/chrome_cache/f_000030 b/test_data/chrome_cache/f_000030 new file mode 100644 index 0000000..05507d5 Binary files /dev/null and b/test_data/chrome_cache/f_000030 differ diff --git a/test_data/chrome_cache/f_000031 b/test_data/chrome_cache/f_000031 new file mode 100644 index 0000000..b3b0514 Binary files /dev/null and b/test_data/chrome_cache/f_000031 differ diff --git a/test_data/chrome_cache/f_000032 b/test_data/chrome_cache/f_000032 new file mode 100644 index 0000000..48864fb Binary files /dev/null and b/test_data/chrome_cache/f_000032 differ diff --git a/test_data/chrome_cache/f_000033 b/test_data/chrome_cache/f_000033 new file mode 100644 index 0000000..be5bf7d Binary files /dev/null and b/test_data/chrome_cache/f_000033 differ diff --git a/test_data/chrome_cache/f_000034 b/test_data/chrome_cache/f_000034 new file mode 100644 index 0000000..e36c4c7 Binary files /dev/null and b/test_data/chrome_cache/f_000034 differ diff --git a/test_data/chrome_cache/f_000035 b/test_data/chrome_cache/f_000035 new file mode 100644 index 0000000..17ca8df Binary files /dev/null and b/test_data/chrome_cache/f_000035 differ diff --git a/test_data/chrome_cache/f_000036 b/test_data/chrome_cache/f_000036 new file mode 100644 index 0000000..d32c907 Binary files /dev/null and b/test_data/chrome_cache/f_000036 differ diff --git a/test_data/chrome_cache/f_000037 b/test_data/chrome_cache/f_000037 new file mode 100644 index 0000000..634e4be Binary files /dev/null and b/test_data/chrome_cache/f_000037 differ diff --git a/test_data/chrome_cache/f_000038 b/test_data/chrome_cache/f_000038 new file mode 100644 index 0000000..b94711c Binary files /dev/null and b/test_data/chrome_cache/f_000038 differ diff --git a/test_data/chrome_cache/f_000039 b/test_data/chrome_cache/f_000039 new file mode 100644 index 0000000..7eb5f77 Binary files /dev/null and b/test_data/chrome_cache/f_000039 differ diff --git a/test_data/chrome_cache/f_00003a b/test_data/chrome_cache/f_00003a new file mode 100644 index 0000000..e4c4c4d Binary files /dev/null and b/test_data/chrome_cache/f_00003a differ diff --git a/test_data/chrome_cache/f_00003b b/test_data/chrome_cache/f_00003b new file mode 100644 index 0000000..9f1531c Binary files /dev/null and b/test_data/chrome_cache/f_00003b differ diff --git a/test_data/chrome_cache/f_00003c b/test_data/chrome_cache/f_00003c new file mode 100644 index 0000000..58a8bc2 Binary files /dev/null and b/test_data/chrome_cache/f_00003c differ diff --git a/test_data/chrome_cache/f_00003d b/test_data/chrome_cache/f_00003d new file mode 100644 index 0000000..51fadd4 Binary files /dev/null and b/test_data/chrome_cache/f_00003d differ diff --git a/test_data/chrome_cache/f_00003e b/test_data/chrome_cache/f_00003e new file mode 100644 index 0000000..14eeff9 Binary files /dev/null and b/test_data/chrome_cache/f_00003e differ diff --git a/test_data/chrome_cache/f_00003f b/test_data/chrome_cache/f_00003f new file mode 100644 index 0000000..0a96afb Binary files /dev/null and b/test_data/chrome_cache/f_00003f differ diff --git a/test_data/chrome_cache/f_000040 b/test_data/chrome_cache/f_000040 new file mode 100644 index 0000000..57a1386 Binary files /dev/null and b/test_data/chrome_cache/f_000040 differ diff --git a/test_data/chrome_cache/f_000041 b/test_data/chrome_cache/f_000041 new file mode 100644 index 0000000..54d3fe6 Binary files /dev/null and b/test_data/chrome_cache/f_000041 differ diff --git a/test_data/chrome_cache/f_000042 b/test_data/chrome_cache/f_000042 new file mode 100644 index 0000000..774f96b Binary files /dev/null and b/test_data/chrome_cache/f_000042 differ diff --git a/test_data/chrome_cache/f_000043 b/test_data/chrome_cache/f_000043 new file mode 100644 index 0000000..c2a6fa0 Binary files /dev/null and b/test_data/chrome_cache/f_000043 differ diff --git a/test_data/chrome_cache/f_000044 b/test_data/chrome_cache/f_000044 new file mode 100644 index 0000000..338bc55 Binary files /dev/null and b/test_data/chrome_cache/f_000044 differ diff --git a/test_data/chrome_cache/f_000045 b/test_data/chrome_cache/f_000045 new file mode 100644 index 0000000..50f88ba Binary files /dev/null and b/test_data/chrome_cache/f_000045 differ diff --git a/test_data/chrome_cache/f_000046 b/test_data/chrome_cache/f_000046 new file mode 100644 index 0000000..a91ed7a Binary files /dev/null and b/test_data/chrome_cache/f_000046 differ diff --git a/test_data/chrome_cache/f_000047 b/test_data/chrome_cache/f_000047 new file mode 100644 index 0000000..8f1b8fa Binary files /dev/null and b/test_data/chrome_cache/f_000047 differ diff --git a/test_data/chrome_cache/f_000048 b/test_data/chrome_cache/f_000048 new file mode 100644 index 0000000..14a28f6 Binary files /dev/null and b/test_data/chrome_cache/f_000048 differ diff --git a/test_data/chrome_cache/f_000049 b/test_data/chrome_cache/f_000049 new file mode 100644 index 0000000..1500656 Binary files /dev/null and b/test_data/chrome_cache/f_000049 differ diff --git a/test_data/chrome_cache/f_00004a b/test_data/chrome_cache/f_00004a new file mode 100644 index 0000000..b336fc1 Binary files /dev/null and b/test_data/chrome_cache/f_00004a differ diff --git a/test_data/chrome_cache/f_00004b b/test_data/chrome_cache/f_00004b new file mode 100644 index 0000000..b2ef8c9 Binary files /dev/null and b/test_data/chrome_cache/f_00004b differ diff --git a/test_data/chrome_cache/f_00004c b/test_data/chrome_cache/f_00004c new file mode 100644 index 0000000..f06281b Binary files /dev/null and b/test_data/chrome_cache/f_00004c differ diff --git a/test_data/chrome_cache/f_00004d b/test_data/chrome_cache/f_00004d new file mode 100644 index 0000000..12698f8 Binary files /dev/null and b/test_data/chrome_cache/f_00004d differ diff --git a/test_data/chrome_cache/index b/test_data/chrome_cache/index new file mode 100644 index 0000000..1e352d1 Binary files /dev/null and b/test_data/chrome_cache/index differ diff --git a/test_data/chrome_extensions/apdfllckaahabafndbhieahigkjlhalf b/test_data/chrome_extensions/apdfllckaahabafndbhieahigkjlhalf new file mode 100644 index 0000000..bb6e407 --- /dev/null +++ b/test_data/chrome_extensions/apdfllckaahabafndbhieahigkjlhalf @@ -0,0 +1,433 @@ +<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" lang="en" ><head><title>Chrome Web Store - Google Drive
\ No newline at end of file diff --git a/test_data/chrome_extensions/blpcfgokakmgnkcojhhkbfbldkacnbeo b/test_data/chrome_extensions/blpcfgokakmgnkcojhhkbfbldkacnbeo new file mode 100644 index 0000000..9cca9a8 --- /dev/null +++ b/test_data/chrome_extensions/blpcfgokakmgnkcojhhkbfbldkacnbeo @@ -0,0 +1,444 @@ +Chrome Web Store - YouTube
\ No newline at end of file diff --git a/test_data/chrome_extensions/hmjkmjkepdijhoojdojkdfohbdgmmhki b/test_data/chrome_extensions/hmjkmjkepdijhoojdojkdfohbdgmmhki new file mode 100644 index 0000000..6c8b833 --- /dev/null +++ b/test_data/chrome_extensions/hmjkmjkepdijhoojdojkdfohbdgmmhki @@ -0,0 +1,446 @@ +Chrome Web Store - Google Keep - notes and lists
\ No newline at end of file diff --git a/test_data/chrome_extensions/icppfcnhkcmnfdhfhphakoifcfokfdhg b/test_data/chrome_extensions/icppfcnhkcmnfdhfhphakoifcfokfdhg new file mode 100644 index 0000000..6cee478 --- /dev/null +++ b/test_data/chrome_extensions/icppfcnhkcmnfdhfhphakoifcfokfdhg @@ -0,0 +1,429 @@ +Chrome Web Store - Google Play Music
\ No newline at end of file diff --git a/test_data/chrome_extensions/pjkljhegncpnkpknbcohdijeoejaedia b/test_data/chrome_extensions/pjkljhegncpnkpknbcohdijeoejaedia new file mode 100644 index 0000000..a8ca05d --- /dev/null +++ b/test_data/chrome_extensions/pjkljhegncpnkpknbcohdijeoejaedia @@ -0,0 +1,426 @@ +Chrome Web Store - Gmail
\ No newline at end of file diff --git a/test_data/com.apple.HIToolbox.plist b/test_data/com.apple.HIToolbox.plist new file mode 100644 index 0000000..55da3ad Binary files /dev/null and b/test_data/com.apple.HIToolbox.plist differ diff --git a/test_data/com.apple.SoftwareUpdate.plist b/test_data/com.apple.SoftwareUpdate.plist new file mode 100644 index 0000000..e32bafb Binary files /dev/null and b/test_data/com.apple.SoftwareUpdate.plist differ diff --git a/test_data/com.apple.TimeMachine.plist b/test_data/com.apple.TimeMachine.plist new file mode 100644 index 0000000..1970944 Binary files /dev/null and b/test_data/com.apple.TimeMachine.plist differ diff --git a/test_data/com.apple.airport.preferences.plist b/test_data/com.apple.airport.preferences.plist new file mode 100644 index 0000000..3d912ff --- /dev/null +++ b/test_data/com.apple.airport.preferences.plist @@ -0,0 +1,397 @@ + + + + + RememberedNetworks + + + AutoLogin + + CachedScanRecord + + 80211D_IE + + IE_KEY_80211D_CHAN_INFO_ARRAY + + + IE_KEY_80211D_FIRST_CHANNEL + 1 + IE_KEY_80211D_MAX_POWER + 16 + IE_KEY_80211D_NUM_CHANNELS + 11 + + + IE_KEY_80211D_COUNTRY_CODE + US + + AGE + 0 + AP_MODE + 2 + BEACON_INT + 100 + BSSID + 0:1d:d2:b8:6e:40 + CAPABILITIES + 3089 + CHANNEL + 6 + CHANNEL_FLAGS + 10 + HT_CAPS_IE + + AMPDU_PARAMS + 23 + ASEL_CAPS + 0 + CAPS + 12 + EXT_CAPS + 0 + MCS_SET + //8AAAAAAAAAAAAAAAAAAA== + TXBF_CAPS + 0 + + HT_IE + + HT_BASIC_MCS_SET + AAAAAAAAAAAAAAAAAAAAAA== + HT_DUAL_BEACON + + HT_DUAL_CTS_PROT + + HT_LSIG_TXOP_PROT_FULL + + HT_NON_GF_STAS_PRESENT + + HT_OBSS_NON_HT_STAS_PRESENT + + HT_OP_MODE + 1 + HT_PCO_ACTIVE + + HT_PCO_PHASE + + HT_PRIMARY_CHAN + 6 + HT_PSMP_STAS_ONLY + + HT_RIFS_MODE + + HT_SECONDARY_BEACON + + HT_SECONDARY_CHAN_OFFSET + 0 + HT_SERVICE_INT + 0 + HT_STA_CHAN_WIDTH + + HT_TX_BURST_LIMIT + + + IE + AAZldXJvcGEBCIKEi5YSJEhsAwEGKgEEMgQMGDBgLRoMABf//wAAAAAAAAAAAAAAAAAAAAAAAAAAAD0WBgAFAAAAAAAAAAAAAAAAAAAAAAAAAD4BAN0aAFDyAQEAAFDyAgIAAFDyAgBQ8gQBAABQ8gIwGAEAAA+sAgIAAA+sAgAPrAQBAAAPrAIAAN0YAFDyAgEBgAADpAAAJ6QAAEJDXgBiMi8ACwUCAGASen8BAd0HAAxDAwAAAAcGVVMgAQsQ3YcAUPIEEEoAARAQRAABAhA7AAEDEEcAECiAKIAogBiAqIAAHdK4bkAQIQAFQVJSSVMQIwAGVEc4NjJHECQABlJUMjg2MBBCAAgxMjM0NTY3OBBUAAgABgBQ8gQAARARABJBUlJJUyBURzg2MiBSb3V0ZXIQCAACIQwQPAABARBJAAYANyoAASA= + NOISE + 0 + RATES + + 1 + 2 + 5 + 11 + 9 + 18 + 36 + 54 + 6 + 12 + 24 + 48 + + RSN_IE + + IE_KEY_RSN_AUTHSELS + + 2 + + IE_KEY_RSN_MCIPHER + 2 + IE_KEY_RSN_UCIPHERS + + 2 + 4 + + IE_KEY_RSN_VERSION + 1 + + RSSI + -50 + SCAN_DIRECTED + + SSID + ZXVyb3Bh + SSID_STR + europa + WPA_IE + + IE_KEY_WPA_AUTHSELS + + 2 + + IE_KEY_WPA_MCIPHER + 2 + IE_KEY_WPA_UCIPHERS + + 2 + 4 + + IE_KEY_WPA_VERSION + 1 + + WPS_PROB_RESP_IE + + IE_KEY_WPS_CFG_METHODS + 8460 + IE_KEY_WPS_DEV_NAME + ARRIS TG862 Router + IE_KEY_WPS_DEV_NAME_DATA + QVJSSVMgVEc4NjIgUm91dGVy + IE_KEY_WPS_MANUFACTURER + ARRIS + IE_KEY_WPS_MODEL_NAME + TG862G + IE_KEY_WPS_MODEL_NUM + RT2860 + IE_KEY_WPS_PRIMARY_DEV_TYPE + + WPS_DEV_TYPE_CAT + 6 + WPS_DEV_TYPE_OUI + AFDyBA== + WPS_DEV_TYPE_SUB_CAT + 1 + + IE_KEY_WPS_RESP_TYPE + 3 + IE_KEY_WPS_RF_BANDS + 1 + IE_KEY_WPS_SC_STATE + 2 + IE_KEY_WPS_SERIAL_NUM + 12345678 + IE_KEY_WPS_UUID_E + KIAogCiAGICogAAd0rhuQA== + + + Captive + + Closed + + Disabled + + LastConnected + 2013-07-30T00:29:26Z + Passpoint + + PossiblyHiddenNetwork + + SPRoaming + + SSID + ZXVyb3Bh + SSIDString + europa + SecurityType + WPA/WPA2 Personal + SystemMode + + TemporarilyDisabled + + + + AutoLogin + + CachedScanRecord + + AGE + 0 + AP_MODE + 2 + BEACON_INT + 100 + BSSID + 0:24:6c:26:a4:1 + CAPABILITIES + 1057 + CHANNEL + 11 + CHANNEL_FLAGS + 10 + HT_CAPS_IE + + AMPDU_PARAMS + 27 + ASEL_CAPS + 0 + CAPS + 4556 + EXT_CAPS + 0 + MCS_SET + //8AAAAAAAAAAAAAAAAAAA== + TXBF_CAPS + 0 + + HT_IE + + HT_BASIC_MCS_SET + /wAAAAAAAAAAAAAAAAAAAA== + HT_DUAL_BEACON + + HT_DUAL_CTS_PROT + + HT_LSIG_TXOP_PROT_FULL + + HT_NON_GF_STAS_PRESENT + + HT_OBSS_NON_HT_STAS_PRESENT + + HT_OP_MODE + 1 + HT_PCO_ACTIVE + + HT_PCO_PHASE + + HT_PRIMARY_CHAN + 11 + HT_PSMP_STAS_ONLY + + HT_RIFS_MODE + + HT_SECONDARY_BEACON + + HT_SECONDARY_CHAN_OFFSET + 0 + HT_SERVICE_INT + 0 + HT_STA_CHAN_WIDTH + + HT_TX_BURST_LIMIT + + + IE + AAlDYW1wdXNOZXQBCIKECwwSFhgkAwELKgEAMgQwSGBsLRrMERv//wAAAAAAAAAAAAAAAAAAAAAAAAAAAD0WCwAZAAAA/wAAAAAAAAAAAAAAAAAAAN0eAJBMM8wRG///AAAAAAAAAAAAAAAAAAAAAAAAAAAA3RoAkEw0CwAZAAAA/wAAAAAAAAAAAAAAAAAAAN0YAFDyAgEBgAADpAAAJ6QAAEJDXgBiMi8A3QoAA38EAQAAAAAA + NOISE + 0 + RATES + + 1 + 2 + 5 + 6 + 9 + 11 + 12 + 18 + 24 + 36 + 48 + 54 + + RSSI + -41 + SCAN_DIRECTED + + SSID + Q2FtcHVzTmV0 + SSID_STR + CampusNet + + Captive + + Closed + + Disabled + + LastConnected + 2013-12-12T19:03:04Z + Passpoint + + PossiblyHiddenNetwork + + SPRoaming + + SSID + Q2FtcHVzTmV0 + SSIDString + CampusNet + SecurityType + Open + SystemMode + + TemporarilyDisabled + + + + AutoLogin + + Captive + + Closed + + Disabled + + LastConnected + 2013-12-13T15:45:46Z + Passpoint + + PossiblyHiddenNetwork + + SPRoaming + + SSID + Qm9zZSBTb3VuZExpbmsgQWlyIE5ldHdvcms= + SSIDString + Bose SoundLink Air Network + SecurityType + Open + SystemMode + + TemporarilyDisabled + + + + AutoLogin + + Captive + + Closed + + Disabled + + LastConnected + 2013-12-13T16:05:47Z + Passpoint + + PossiblyHiddenNetwork + + SPRoaming + + SSID + Qm9pbmdvIEhvdHNwb3Q= + SSIDString + Boingo Hotspot + SecurityType + Open + SystemMode + + TemporarilyDisabled + + + + Version + 14 + + diff --git a/test_data/com.apple.coreservices.appleidauthenticationinfo.ABC0ABC1-ABC0-ABC0-ABC0-ABC0ABC1ABC2.plist b/test_data/com.apple.coreservices.appleidauthenticationinfo.ABC0ABC1-ABC0-ABC0-ABC0-ABC0ABC1ABC2.plist new file mode 100644 index 0000000..ea0ac4c Binary files /dev/null and b/test_data/com.apple.coreservices.appleidauthenticationinfo.ABC0ABC1-ABC0-ABC0-ABC0-ABC0ABC1ABC2.plist differ diff --git a/test_data/com.apple.iPod.plist b/test_data/com.apple.iPod.plist new file mode 100644 index 0000000..c394bd1 --- /dev/null +++ b/test_data/com.apple.iPod.plist @@ -0,0 +1,111 @@ + + + + + Devices + + 0000A11300000000 + + Connected + 1995-11-22T18:25:07Z + Device Class + iPad + Family ID + 10006 + Firmware Version + 256 + Firmware Version String + 5.0.1 + ID + 0000A11300000000 + Region Info + C/A + Serial Number + A009A113Q00 + Updater Family ID + 10006 + Use Count + 5 + + 0DEADBEEF00F0A20 + + Connected + 2014-04-30T17:50:22Z + Device Class + iPhone + Family ID + 10035 + Firmware Version + 256 + Firmware Version String + 7.0.3 + ID + 0DEADBEEF00F0A20 + IMEI + 005550123044440 + Region Info + LL/A + Serial Number + ADALTHING012 + Updater Family ID + 10035 + Use Count + 18 + + 00000000012E4737 + + Connected + 2011-10-03T08:14:21Z + Device Class + iPad + Family ID + 10023 + Firmware Version + 256 + Firmware Version String + 7.0 + ID + 00000000012E4737 + IMEI + 01133557799BBDD + Region Info + LL/A + Serial Number + RJUPNASALIR12 + Updater Family ID + 10023 + Use Count + 7 + + 4C6F6F6E65000000 + + Connected + 2013-10-09T19:27:54Z + Device Class + iPhone + Family ID + 10016 + Firmware Version + 256 + Firmware Version String + 7.0 + ID + 4C6F6F6E65000000 + IMEI + 012345678901234 + Region Info + LL/A + Serial Number + 526F676572 + Updater Family ID + 10016 + Use Count + 1 + + + com.apple.PreferenceSync.ExcludeAllSyncKeys + + abcdefgh + + + diff --git a/test_data/com.apple.spotlight.plist b/test_data/com.apple.spotlight.plist new file mode 100644 index 0000000..0666b23 Binary files /dev/null and b/test_data/com.apple.spotlight.plist differ diff --git a/test_data/contacts2.db b/test_data/contacts2.db new file mode 100644 index 0000000..dcf6747 Binary files /dev/null and b/test_data/contacts2.db differ diff --git a/test_data/cookies.db b/test_data/cookies.db new file mode 100644 index 0000000..866932d Binary files /dev/null and b/test_data/cookies.db differ diff --git a/test_data/document_versions.sql b/test_data/document_versions.sql new file mode 100644 index 0000000..5c6c950 Binary files /dev/null and b/test_data/document_versions.sql differ diff --git a/test_data/downloads.sqlite b/test_data/downloads.sqlite new file mode 100644 index 0000000..da950b8 Binary files /dev/null and b/test_data/downloads.sqlite differ diff --git a/test_data/empty_file b/test_data/empty_file new file mode 100644 index 0000000..e69de29 diff --git a/test_data/example.lnk b/test_data/example.lnk new file mode 100644 index 0000000..e5316d7 Binary files /dev/null and b/test_data/example.lnk differ diff --git a/test_data/firefox_cache/firefox28/E8D65m01 b/test_data/firefox_cache/firefox28/E8D65m01 new file mode 100644 index 0000000..418c630 Binary files /dev/null and b/test_data/firefox_cache/firefox28/E8D65m01 differ diff --git a/test_data/firefox_cache/firefox28/_CACHE_001_ b/test_data/firefox_cache/firefox28/_CACHE_001_ new file mode 100644 index 0000000..c7762c1 Binary files /dev/null and b/test_data/firefox_cache/firefox28/_CACHE_001_ differ diff --git a/test_data/firefox_cache/firefox28/_CACHE_002_ b/test_data/firefox_cache/firefox28/_CACHE_002_ new file mode 100644 index 0000000..2a04f07 Binary files /dev/null and b/test_data/firefox_cache/firefox28/_CACHE_002_ differ diff --git a/test_data/firefox_cache/firefox28/_CACHE_003_ b/test_data/firefox_cache/firefox28/_CACHE_003_ new file mode 100644 index 0000000..418c630 Binary files /dev/null and b/test_data/firefox_cache/firefox28/_CACHE_003_ differ diff --git a/test_data/firefox_cache/firefox3/_CACHE_001_ b/test_data/firefox_cache/firefox3/_CACHE_001_ new file mode 100644 index 0000000..798d443 Binary files /dev/null and b/test_data/firefox_cache/firefox3/_CACHE_001_ differ diff --git a/test_data/firefox_cache/firefox3/_CACHE_002_ b/test_data/firefox_cache/firefox3/_CACHE_002_ new file mode 100644 index 0000000..946716e Binary files /dev/null and b/test_data/firefox_cache/firefox3/_CACHE_002_ differ diff --git a/test_data/firefox_cache/firefox3/_CACHE_003_ b/test_data/firefox_cache/firefox3/_CACHE_003_ new file mode 100644 index 0000000..e5ac2e1 Binary files /dev/null and b/test_data/firefox_cache/firefox3/_CACHE_003_ differ diff --git a/test_data/firefox_cache/invalid_file b/test_data/firefox_cache/invalid_file new file mode 100644 index 0000000..11fd7da --- /dev/null +++ b/test_data/firefox_cache/invalid_file @@ -0,0 +1 @@ +I am not a Firefox cache file. \ No newline at end of file diff --git a/test_data/firefox_cookies.sqlite b/test_data/firefox_cookies.sqlite new file mode 100644 index 0000000..b85fbc9 Binary files /dev/null and b/test_data/firefox_cookies.sqlite differ diff --git a/test_data/firewall.log b/test_data/firewall.log new file mode 100644 index 0000000..0ec8c46 --- /dev/null +++ b/test_data/firewall.log @@ -0,0 +1,19 @@ +#Version: 1.5 +#Software: Microsoft Windows Firewall +#Time Format: Local +#Fields: date time action protocol src-ip dst-ip src-port dst-port size tcpflags tcpsyn tcpack tcpwin icmptype icmpcode info path +2005-04-11 08:05:57 DROP UDP 123.45.78.90 123.156.78.255 137 137 78 - - - - - - - RECEIVE +2005-04-11 08:05:57 DROP UDP 123.45.78.90 255.255.255.255 1631 2234 37 - - - - - - - RECEIVE +2005-04-11 08:05:58 OPEN UDP 123.45.78.90 123.156.78.90 500 500 - - - - - - - - - +2005-04-11 08:05:58 DROP UDP 123.45.78.90 123.156.78.255 138 138 299 - - - - - - - RECEIVE +2005-04-11 08:06:02 CLOSE UDP 123.45.78.90 123.156.78.90 1027 53 - - - - - - - - - +2005-04-11 08:06:02 CLOSE UDP 123.45.78.90 123.156.78.90 137 137 - - - - - - - - - +2005-04-11 08:06:05 DROP UDP 0.0.0.0 255.255.255.255 68 67 328 - - - - - - - RECEIVE +2005-04-11 08:06:26 DROP TCP 123.45.78.90 123.156.78.90 80 1774 576 A 123456789 987654321 12345 - - - RECEIVE +2005-04-11 08:06:27 DROP TCP 123.45.78.90 123.156.78.90 80 1774 576 AP 123456789 987654321 12345 - - - RECEIVE +2005-04-11 08:08:58 DROP ICMP 123.45.78.90 123.156.78.90 7 7 78 - - - - 8 0 - RECEIVE +2005-04-11 08:09:29 OPEN TCP 123.45.78.90 123.156.78.90 1606 445 - - - - - - - - - +2005-04-11 08:09:30 CLOSE TCP 123.45.78.90 123.156.78.90 1607 139 - - - - - - - - - +2005-04-11 08:48:46 DROP TCP 123.45.78.90 123.156.78.90 80 1693 40 A 2351482979 694744025 64675 - - - RECEIVE +2005-04-11 08:48:46 DROP TCP 123.45.78.90 123.156.78.90 80 1693 40 FA 2351482979 694744025 64675 - - - RECEIVE +2005-04-11 08:52:26 INFO-EVENTS-LOST - - - - - - - - - - - - 59 - diff --git a/test_data/global_history.dat b/test_data/global_history.dat new file mode 100644 index 0000000..d0b7a2c --- /dev/null +++ b/test_data/global_history.dat @@ -0,0 +1,148 @@ +http://redir.opera.com/www.opera.com/firstrun/ +http://redir.opera.com/www.opera.com/firstrun/ +1384209918 +-1 +Welcome to Opera +http://www.opera.com/portal/startup/?fr=1 +1384209922 +-1 +http://www.mbl.is/mm/augl/counter/31371_37692.html +http://www.mbl.is/mm/augl/counter/31371_37692.html +1384209942 +-1 +https://www.facebook.com/connect/ping?client_id=367160293313398&domain=www.mbl.is&origin=1&redirect_uri=http%3A%2F%2Fstatic.ak.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D28%23cb%3Df171bc7081f98f8%26domain%3Dwww.mbl.is%26origin%3Dhttp%253A%252F%252Fwww.mbl.is%252Ff13c78fa277e75%26relation%3Dparent&response_type=token%2Csigned_request%2Ccode&sdk=joey +https://www.facebook.com/connect/ping?client_id=367160293313398&domain=www.mbl.is&origin=1&redirect_uri=http%3A%2F%2Fstatic.ak.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D28%23cb%3Df171bc7081f98f8%26domain%3Dwww.mbl.is%26origin%3Dhttp%253A%252F%252Fwww.mbl.is%252Ff13c78fa277e75%26relation%3Dparent&response_type=token%2Csigned_request%2Ccode&sdk=joey +1384209943 +-1 +Karl Bretaprins fær ellilífeyri - mbl.is +http://www.mbl.is/frettir/erlent/2013/11/11/karl_bretaprins_faer_ellilifeyri/ +1384209946 +-1 +https://www.facebook.com/connect/ping?client_id=367160293313398&domain=www.mbl.is&origin=1&redirect_uri=http%3A%2F%2Fstatic.ak.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D28%23cb%3Df2708237ff9aab%26domain%3Dwww.mbl.is%26origin%3Dhttp%253A%252F%252Fwww.mbl.is%252Ff235143a319bc%26relation%3Dparent&response_type=token%2Csigned_request%2Ccode&sdk=joey +https://www.facebook.com/connect/ping?client_id=367160293313398&domain=www.mbl.is&origin=1&redirect_uri=http%3A%2F%2Fstatic.ak.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D28%23cb%3Df2708237ff9aab%26domain%3Dwww.mbl.is%26origin%3Dhttp%253A%252F%252Fwww.mbl.is%252Ff235143a319bc%26relation%3Dparent&response_type=token%2Csigned_request%2Ccode&sdk=joey +1384209948 +-1 +152 milljónir duga kónginum ekki - mbl.is +http://www.mbl.is/frettir/erlent/2013/11/07/152_milljonir_duga_konginum_ekki/ +1384209950 +-1 +http://www.mbl.is/mm/augl/counter/31393_37174.html +http://www.mbl.is/mm/augl/counter/31393_37174.html +1384209953 +-1 +https://www.facebook.com/connect/ping?client_id=367160293313398&domain=www.mbl.is&origin=1&redirect_uri=http%3A%2F%2Fstatic.ak.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D28%23cb%3Df285426b74aead%26domain%3Dwww.mbl.is%26origin%3Dhttp%253A%252F%252Fwww.mbl.is%252Ff1eee7e373226c%26relation%3Dparent&response_type=token%2Csigned_request%2Ccode&sdk=joey +https://www.facebook.com/connect/ping?client_id=367160293313398&domain=www.mbl.is&origin=1&redirect_uri=http%3A%2F%2Fstatic.ak.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D28%23cb%3Df285426b74aead%26domain%3Dwww.mbl.is%26origin%3Dhttp%253A%252F%252Fwww.mbl.is%252Ff1eee7e373226c%26relation%3Dparent&response_type=token%2Csigned_request%2Ccode&sdk=joey +1384209954 +-1 +http://www.mbl.is/mm/augl/counter/31476_37989.html +http://www.mbl.is/mm/augl/counter/31476_37989.html +1384209954 +-1 +Karl og Camilla blessuð við Ganges-fljót - mbl.is +http://www.mbl.is/frettir/erlent/2013/11/07/karl_og_camilla_blessud_vid_ganges_fljot/ +1384209955 +-1 +http://www.mbl.is/mm/augl/counter/30536_37692.html +http://www.mbl.is/mm/augl/counter/30536_37692.html +1384209960 +-1 +Fréttaknippi: Kóngafólk í fjölmiðlum - mbl.is +http://www.mbl.is/frettir/knippi/2990/ +1384209961 +-1 +http://theonion.com/ +http://theonion.com/ +1384209968 +-1 +http://www.google.com/pagead/drt/ui +http://www.google.com/pagead/drt/ui +1384209970 +-1 +The Onion - America's Finest News Source +http://www.theonion.com/ +1384209970 +-1 +10 Celebrities You Never Knew Were Abducted And Murdered By Andie MacDowell | The Onion - America's Finest News Source +http://www.theonion.com/articles/10-celebrities-you-never-knew-were-abducted-and-mu,34518/ +1384209976 +-1 +10 Celebrities You Never Knew Were Abducted And Murdered By Andie MacDowell | The Onion - America's Finest News Source +http://www.theonion.com/articles/10-celebrities-you-never-knew-were-abducted-and-mu,34518/#4 +1384209982 +-1 +10 Celebrities You Never Knew Were Abducted And Murdered By Andie MacDowell | The Onion - America's Finest News Source +http://www.theonion.com/articles/10-celebrities-you-never-knew-were-abducted-and-mu,34518/#10 +1384209988 +-1 +10 Celebrities You Never Knew Were Abducted And Murdered By Andie MacDowell | The Onion - America's Finest News Source +http://www.theonion.com/articles/10-celebrities-you-never-knew-were-abducted-and-mu,34518/#11 +1384209989 +-1 +http://code.google.com/p/plaso +http://code.google.com/p/plaso +1384209998 +-1 +plaso - Plaso Langar Að Safna Öllu - Google Project Hosting +http://code.google.com/p/plaso/ +1384209998 +-1 +Source Checkout - plaso - Plaso Langar Að Safna Öllu - Google Project Hosting +http://code.google.com/p/plaso/source/checkout +1384210005 +-1 +/ - plaso - Plaso Langar Að Safna Öllu - Google Project Hosting +http://code.google.com/p/plaso/source/browse/ +1384210007 +-1 +http://plaso.googlecode.com/git/plaso/proto/plaso_storage.proto +http://plaso.googlecode.com/git/plaso/proto/plaso_storage.proto +1384210021 +-1 +plaso_storage.proto - plaso - Plaso Langar Að Safna Öllu - Google Project Hosting +http://code.google.com/p/plaso/source/browse/plaso/proto/plaso_storage.proto +1384210048 +-1 +plaso - home of the super timeline +http://plaso.kiddaland.net/ +1384210094 +-1 +log2timeline - plaso - home of the super timeline +http://plaso.kiddaland.net/usage/log2timeline +1384210102 +-1 +https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/ +https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/ +1384210105 +-1 +https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/1.0.2 +https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/1.0.2 +1384210107 +-1 +https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/1.0.2/ +https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/1.0.2/ +1384210107 +-1 +https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/1.0.2/final +https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/1.0.2/final +1384210110 +-1 +https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/1.0.2/final/ +https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/1.0.2/final/ +1384210701 +2419778 +http://mbl.is/ +http://mbl.is/ +1384210706 +2419976 +http://www.mbl.is/mm/augl/counter/31230_34050.html +http://www.mbl.is/mm/augl/counter/31230_34050.html +1384210708 +-1 +http://www.mbl.is/mm/augl/counter/31465_37902.html +http://www.mbl.is/mm/augl/counter/31465_37902.html +1384210708 +2419976 +Fréttir - mbl.is +http://www.mbl.is/frettir/ +1384210709 +2419966 diff --git a/test_data/iis.log b/test_data/iis.log new file mode 100644 index 0000000..3d072d6 --- /dev/null +++ b/test_data/iis.log @@ -0,0 +1,15 @@ +#Software: Microsoft Internet Information Services 6.0 +#Version: 1.0 +#Date: 2013-07-30 00:00:00 +#Fields: date time s-sitename s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus sc-win32-status +2013-07-30 00:00:00 SOME1234NAME 10.10.10.100 GET /some/image/path/something.jpg - 80 - 10.10.10.100 Mozilla/4.0+(compatible;+Win32;+WinHttp.WinHttpRequest.5) 200 0 0 +2013-07-30 00:00:03 SOME1234NAME 10.10.10.100 GET /some/image/path/something.htm - 80 - 22.22.22.200 Mozilla/5.0+(Macintosh;+Intel+Mac+OS+X+10_6_8)+AppleWebKit/534.57.2+(KHTML,+like+Gecko)+Version/5.1.7+Safari/534.57.2 404 0 0 +2013-07-30 00:00:03 SOME1234NAME 10.10.10.100 GET /some/image/path/something.css - 80 - 22.22.22.200 Mozilla/5.0+(Macintosh;+Intel+Mac+OS+X+10_6_8)+AppleWebKit/534.57.2+(KHTML,+like+Gecko)+Version/5.1.7+Safari/534.57.2 404 0 0 +2013-07-30 00:00:03 SOME1234NAME 10.10.10.100 GET /some/image/path/something.cfm - 80 - 22.22.22.200 Mozilla/5.0+(Macintosh;+Intel+Mac+OS+X+10_6_8)+AppleWebKit/534.57.2+(KHTML,+like+Gecko)+Version/5.1.7+Safari/534.57.2 404 0 0 +2013-07-30 00:00:03 SOME1234NAME 10.10.10.100 GET /some/path/something.jpg whichServer=WEBSOS2 80 - 10.10.10.100 CFSCHEDULE 200 0 0 +2013-07-30 00:00:05 SOME1234NAME 10.10.10.100 GET /some/image/path/something.jpg - 80 - 22.22.22.200 Mozilla/5.0+(iPhone;+CPU+iPhone+OS+6_0_1+like+Mac+OS+X)+AppleWebKit/536.26+(KHTML,+like+Gecko)+Mobile/10A523 200 0 0 +2013-07-30 00:00:05 SOME1234NAME 10.10.10.100 GET /some/something.jpg - 80 - 22.22.22.200 Mozilla/5.0+(iPhone;+CPU+iPhone+OS+6_0_1+like+Mac+OS+X)+AppleWebKit/536.26+(KHTML,+like+Gecko)+Mobile/10A523 200 0 0 +2013-07-30 00:00:08 SOME1234NAME 10.10.10.100 GET /something.jpg - 80 - 22.22.22.200 - 200 0 0 +2013-07-30 00:00:08 SOME1234NAME 10.10.10.100 GET /some/image/path/something.jpg - 80 - 22.22.22.200 Mozilla/5.0+(Windows+NT+5.1;+rv:10.0)+Gecko/20100101+Firefox/10.0 200 0 0 +2013-07-30 00:00:12 SOME1234NAME 10.10.10.100 GET / - 80 - 22.22.22.200 Mozilla/5.0+(iPhone;+CPU+iPhone+OS+6_1_3+like+Mac+OS+X)+AppleWebKit/536.26+(KHTML,+like+Gecko)+Mobile/10B329 302 0 0 +2013-07-30 00:00:13 SOME1234NAME 10.10.10.100 POST /some/image/path/something.jpg requesttimeout=500 80 - 22.22.22.200 Mozilla/4.0+(compatible;+MSIE+8.0;+Windows+NT+6.1;+Trident/4.0;+SLCC2;+.NET+CLR+2.0.50727;+.NET+CLR+3.5.30729;+.NET+CLR+3.0.30729;+Media+Center+PC+6.0;+.NET+CLR+1.1.4322;+.NET4.0C;+.NET4.0E;+InfoPath.2) 302 0 0 \ No newline at end of file diff --git a/test_data/image-split.E01 b/test_data/image-split.E01 new file mode 100644 index 0000000..3c1ce67 Binary files /dev/null and b/test_data/image-split.E01 differ diff --git a/test_data/image-split.E02 b/test_data/image-split.E02 new file mode 100644 index 0000000..64ed207 Binary files /dev/null and b/test_data/image-split.E02 differ diff --git a/test_data/image.E01 b/test_data/image.E01 new file mode 100644 index 0000000..b792a54 Binary files /dev/null and b/test_data/image.E01 differ diff --git a/test_data/image.qcow2 b/test_data/image.qcow2 new file mode 100644 index 0000000..8a52c93 Binary files /dev/null and b/test_data/image.qcow2 differ diff --git a/test_data/image.vhd b/test_data/image.vhd new file mode 100644 index 0000000..678307d Binary files /dev/null and b/test_data/image.vhd differ diff --git a/test_data/image.vmdk b/test_data/image.vmdk new file mode 100644 index 0000000..ecc8bd6 Binary files /dev/null and b/test_data/image.vmdk differ diff --git a/test_data/index.dat b/test_data/index.dat new file mode 100644 index 0000000..6bb53e1 Binary files /dev/null and b/test_data/index.dat differ diff --git a/test_data/java.idx b/test_data/java.idx new file mode 100644 index 0000000..21c35b6 Binary files /dev/null and b/test_data/java.idx differ diff --git a/test_data/java_602.idx b/test_data/java_602.idx new file mode 100644 index 0000000..8dc8fe5 Binary files /dev/null and b/test_data/java_602.idx differ diff --git a/test_data/login.keychain b/test_data/login.keychain new file mode 100644 index 0000000..14abac2 Binary files /dev/null and b/test_data/login.keychain differ diff --git a/test_data/mac_cups_ipp b/test_data/mac_cups_ipp new file mode 100644 index 0000000..19f2f70 Binary files /dev/null and b/test_data/mac_cups_ipp differ diff --git a/test_data/mackeeper_cache.db b/test_data/mackeeper_cache.db new file mode 100644 index 0000000..9189c0b Binary files /dev/null and b/test_data/mackeeper_cache.db differ diff --git a/test_data/mactime.body b/test_data/mactime.body new file mode 100644 index 0000000..7cea5f1 --- /dev/null +++ b/test_data/mactime.body @@ -0,0 +1,17 @@ +0|/lost+found|11|d/drwx------|0|0|12288|1337961350|1337961350|1337961350|0 +0|/a_directory|12|d/drwxr-xr-x|151107|5000|1024|1337961564|1337961563|1337961563|0 +0|/a_directory/another_file|16|r/rrw-------|151107|5000|22|1337961583|1337961584|1337961585|0 +0|/a_directory/a_file|14|r/rrw-------|151107|5000|53|1337961554|1337961554|1337961554|0 +0|/a_directory/.another_file.swp (deleted)|0|r/----------|0|0|0|0|0|0|0 +0|/a_directory/another_file~ (deleted)|0|r/----------|0|0|0|0|0|0|0 +0|/passwords.txt|15|r/rr--------|151107|5000|116|1337961653|1337961653|1337961663|0 +0|1235|15|r/rr--------|141104|5000|234|1337961653|1337961653|1337961663|0 +0|1235.134|15|r/rr--------|151101|5000|2345|1337920553|1337931613|1337951660|0 +0|False|15|r/rr--------|151101|5000|2345|1337920553|1337931613|1337951660|0 +0|0|15|r/rr--------|151101|5000|2345|1337920553|1337931613|1337951660|0 +0|/leyndardómarnir/ekki skoða/þínar skrár|15|r/rr--------|101104|5000|234|1337961603|1337061653|1337963913|0 +0|/passwords.txt~ (deleted)|0|r/----------|0|0|0|0|0|0|0 +0|/$OrphanFiles|17|d/d---------|0|0|0|0|0|0|0 +0|/$OrphanFiles/OrphanFile-13 (deleted)|13|-/rrw-------|20035|5000|0|1337961576|1337961574|1337961653|0 +0|D:/$Extend/$RmMetadata/$Txf|30-144-2|d/dr-xr-xr-x|0|0|48|1376484810|1376484810|1376484810|1376484810 +0|D:/$AttrDef|4-128-4|r/rr-xr-xr-x|48|0|2560|1376484808|1376484808|1376484808|1376484808 diff --git a/test_data/mmssms.db b/test_data/mmssms.db new file mode 100755 index 0000000..b7b13e6 Binary files /dev/null and b/test_data/mmssms.db differ diff --git a/test_data/nobody.plist b/test_data/nobody.plist new file mode 100644 index 0000000..a1eb0dc Binary files /dev/null and b/test_data/nobody.plist differ diff --git a/test_data/openbsm.bsm b/test_data/openbsm.bsm new file mode 100644 index 0000000..24ff9db Binary files /dev/null and b/test_data/openbsm.bsm differ diff --git a/test_data/places.sqlite b/test_data/places.sqlite new file mode 100644 index 0000000..3274804 Binary files /dev/null and b/test_data/places.sqlite differ diff --git a/test_data/places_new.sqlite b/test_data/places_new.sqlite new file mode 100644 index 0000000..09806be Binary files /dev/null and b/test_data/places_new.sqlite differ diff --git a/test_data/plist_binary b/test_data/plist_binary new file mode 100644 index 0000000..a2a8c74 Binary files /dev/null and b/test_data/plist_binary differ diff --git a/test_data/popcontest1.log b/test_data/popcontest1.log new file mode 100644 index 0000000..b92163f --- /dev/null +++ b/test_data/popcontest1.log @@ -0,0 +1,19 @@ +POPULARITY-CONTEST-0 TIME:1277185301 ID:12345678901234567890123456789012 ARCH:i386 POPCONVER:1.38 +1277192082 1270556742 at /usr/sbin/atd +1277192083 1271950917 python2.5-minimal /usr/lib/python2.5/lib-dynload/_struct.so +1275197180 1275197263 empathy /usr/bin/empathy +1277192082 1270556742 at /usr/sbin/atd +1272987776 1272987590 nessus /usr/bin/nessus +1273651113 1273651856 gnome-orca /usr/bin/orca +END-POPULARITY-CONTEST-0 TIME:1277185301 + +POPULARITY-CONTEST-1 TIME:1277185301 ID:12345678901234567890123456789012 ARCH:i386 POPCONVER:1.38 +1277192082 1270556742 plaso /super/cool/plasuz +0 1270556742 miss_atime /super/cool/kj +1270556742 0 miss_ctime /super/cool/plasuz +0 0 freepats +1273651113 1273651856 plaso /super/cool +END-POPULARITY-CONTEST-1 TIME:1277185301 + + + diff --git a/test_data/psort_test.out b/test_data/psort_test.out new file mode 100644 index 0000000..604d154 Binary files /dev/null and b/test_data/psort_test.out differ diff --git a/test_data/quarantine.db b/test_data/quarantine.db new file mode 100644 index 0000000..9940b58 Binary files /dev/null and b/test_data/quarantine.db differ diff --git a/test_data/security.log b/test_data/security.log new file mode 100644 index 0000000..381094e --- /dev/null +++ b/test_data/security.log @@ -0,0 +1,9 @@ +Feb 26 19:11:56 secd[1] [user{} ]: securityd_xpc_dictionary_handler EscrowSecurityAl[3273] DeviceInCircle Þetta ætti að virka líka, setja íslensku inn. +Dec 26 19:11:57 secd[11] [serverxpc{SOSCCThisDeviceIsInCircle} ]: securityd_xpc_dictionary_handler EscrowSecurityAl[3273] DeviceInCircle +Dec 26 19:11:58 secd[111] [user{} ]: securityd_xpc_dictionary_handler EscrowSecurityAl[3273] DeviceInCircle +Dec 26 19:11:59 secd[1111] [user{SOSCCThisDeviceIsInCircle} C0x7fff872fa482]: securityd_xpc_dictionary_handler EscrowSecurityAl[3273] DeviceInCircle +Dec 6 19:11:01 secd[1] [user{} ]: +Dec 6 19:11:02 secd[11111] [user{SOSCCThisDeviceIsInCircle} C0x7fff872fa482 F0x106080db0]: +Dec 31 23:59:59 secd[123] [user{} ]: Good byte old year :'( +Mar 1 00:00:01 secd[456] [user{} ]: Happy new year! +Dec 24 01:21:47 --- last message repeated 3 time --- diff --git a/test_data/selinux.log b/test_data/selinux.log new file mode 100644 index 0000000..6a92e00 --- /dev/null +++ b/test_data/selinux.log @@ -0,0 +1,10 @@ +type=LOGIN msg=audit(1337845201.174:94983): pid=25443 uid=0 old auid=4294967295 new auid=0 old ses=4294967295 new ses=1165 +type=WRONGDATE msg=audit(1337845201): missing milliseconds, should be skipped by parser +type=SHORTDATE msg=audit(1337845201.0:0): check rounding +type=EMPTYDATE msg=audit(): empty date, should be skipped by parser + +type= msg=audit(1337845333.174:94984): missing type value, should be skipped by parser +msg=audit(1337845201.174:94984): missing type param, should be skipped by parser +type=NOMSG msg=audit(1337845222.174:94984): +type=UNDER_SCORE msg=audit(1337845666.174:94984): pid=25444 uid=0 old auid=4294967295 new auid=54321 old ses=4294967295 new ses=1166 +type=UNKNOWN[1323] msg=audit(1389164020.991:2159): fd=6 flags=0x802 diff --git a/test_data/skydrive.log b/test_data/skydrive.log new file mode 100755 index 0000000..8840e21 --- /dev/null +++ b/test_data/skydrive.log @@ -0,0 +1,22 @@ +08-01-2013 21:22:28.999 global.cpp:626!logVersionInfo (DETAIL): 17.0.2011.0627 (Ship) +08-01-2013 21:22:29.702 localchanges.cpp:6117!handleLocalChanges (DETAIL): handling LC_PERSIST_SYNC_TOKEN 15, 2 queued changes +08-01-2013 21:22:29.702 localchanges.cpp:5959!handlePersistSyncToken (NORMAL): Persisting Sync Token for drive 1. +SyncToken = LM%3d12345678905670%3bID%3d1234567890E059C0!103%3bLR%3d12345678905623%3aEP%3d2 +08-01-2013 21:22:58.344 global.cpp:612!openErrorLog (DETAIL): Local time is : 08-01-2013 23:22:58.344 +08-01-2013 21:22:58.344 clientpolicysettings.cpp:124!ClientPolicySettings::IsRefreshNeeded (NORMAL): Must refresh because time since last refresh was 458289 seconds ago (Poll Interval: 0). +08-01-2013 21:22:58.812 user.cpp:145!User::GetServiceTicket (ERROR): Cannot get service ticket : 0x8004de40 +08-01-2013 21:22:58.812 storageserviceapi.cpp:1310!StorageServiceApi::GetClientPolicy (ERROR): Unable to get service ticket : 0x8004de40, waiting... +08-01-2013 21:28:39.286 filescanner.cpp:1945!sweepDriveContents (DETAIL): Found deleted file in sweep: genoa secret-logo.png RID(A12FCC3B7AF059C0!118), FSID(1970324837077388,3297664651) +08-01-2013 21:28:39.286 filescanner.cpp:1945!sweepDriveContents (DETAIL): Found deleted file in sweep: publicskydrivewordfile.rtf RID(A12FCC3B7AF059C0!110), FSID(844424930232805,3297664651) +08-01-2013 21:28:39.286 filescanner.cpp:4059!doFullScanWork (DETAIL): scan for lib 1 complete, took 0 ms +08-01-2013 21:28:39.286 core.cpp:2738!handleScanState (DETAIL): finished scanning drive 1, 8 changes +08-01-2013 21:28:46.555 syncserviceproxy.cpp:2209!SyncServiceProxy::StartChangeEnumeration (NORMAL): rootFolderID=A12FCC3B7AF059C0!103 syncToken=LM%3d12345678905670%3bID%3d1234567890E059C0!103%3bLR%3d12345678905623%3aEP%3d2 +08-01-2013 21:28:46.727 storageserviceapi.cpp:70!LogResponse (NORMAL): GET https://dm1.storage.live.com/MyData/LiveFolders?Filter=changes&InlineBlobs=false&MaxItemCount=50&SyncToken=LM%3d12345678905670%3bID%3d1234567890E059C0!103%3bLR%3d12345678905623%3aEP%3d2&View=SkyDriveSync status:200/0x0 retry:0 size:2903 X-MSNSERVER:DM1____1213219 +08-01-2013 21:28:46.727 syncserviceproxy.cpp:2329!SyncServiceProxy::OnDownloadedEntries (NORMAL): EndChangeEnum rootFolderID=A12FCC3B7AF059C0!103 http=200 ss=2 hMD=0 pA=0 syncToken=LM%3d12345678905670%3bID%3d1234567890E059C0!103%3bLR%3d12345678905623%3aEP%3d2 + +-> Tiramisu_recipe.pdf (Document) A12FCC3B7AF059C0!122 A12FCC3B7AF059C0!122.0 A12FCC3B7AF059C0!103 + +08-01-2013 21:28:46.742 uploadgate.cpp:670!UploadGate::NotifyUploadRemoved (NORMAL): Removing change A12FCC3B7AF059C0!122 'Tiramisu_recipe.pdf' +SyncToken = Not a sync token (àèìòù)! +This line should be skipped! +13-33-2013 21:28:46.742 invalid.cpp:666!timestamp (PLASO): invalid timestamp diff --git a/test_data/skydriveerr-unicode.log b/test_data/skydriveerr-unicode.log new file mode 100755 index 0000000..616a1ab --- /dev/null +++ b/test_data/skydriveerr-unicode.log @@ -0,0 +1,28 @@ +######Logging started. Version=17.0.2011.0627 StartSystemTime:2013-07-25-160323.291 StartLocalTime:2013-07-25-180323.291 PID=0x8f4 TID=0x718 ContinuedFrom= +07-25-13,16:03:24.649,13,a98,0,AUTH,authapi.cpp(280),0,0,ERR,Sign in failed : DRX_E_AUTH_NO_VALID_CREDENTIALS, +07-25-13,16:03:24.649,15,718,0,PAL,systeminformationhelper.cpp(661),0,002AF7F8,ERR,The registry key to block Remote Access is not found.,System Error Code=0x2 +07-25-13,16:04:02.669,48,a98,0,AUTH,oauthprofile.cpp(268),0,0,ERR,No node found named Passport-Jméno-člena, no user name available, +07-25-13,16:06:31.306,cb,5d4,0,WNS,wnpconnmanager.cpp(176),7a93608,0,ERR,Failed to resolve proxy name. not using proxy,proxyName=;hr=8004da0e +07-25-13,16:06:31.321,f8,5c4,0,P2P,relayedtransportfactory.cpp(72),7a7ea58,0,ERR,JUMPONFAILURE Failed: ,msg=Runtime::GetProxyName(serverName, proxyName, port);hr=8004da0e +07-25-13,16:06:32.351,194,884,0,NM,notificationendpoint.cpp(24),0,0,ERR,JUMPONFAILURE Failed: ,msg=m_pReceiver->GetUris(queueUri, dgramUri, wlsSubscriptionUri);hr=8004da17 +07-25-13,16:06:33.755,238,a98,0,DFA,ranotificationmanager.cpp(383),0,02A36DC8,ERR,Succeeded Macro failed,hr=8004da0e +######Logging started. Version=17.0.2011.0627 StartSystemTime:2013-07-25-174828.105 StartLocalTime:2013-07-25-194828.105 PID=0x90c TID=0x910 ContinuedFrom= +07-25-13,17:48:47.582,9e,aa0,0,NM,notificationendpoint.cpp(24),0,0,ERR,JUMPONFAILURE Failed: ,msg=m_pReceiver->GetUris(queueUri, dgramUri, wlsSubscriptionUri);hr=8004da17 +07-25-13,17:48:49.189,13c,970,0,PAL,statemachine.cpp(274),2c2d5b0,0,ERR,No such transition defined,owner=@02C5A384;from=Closed;to=Closed +07-26-13,11:18:11.778,137,9ec,0,NM,notificationendpoint.cpp(24),0,0,ERR,JUMPONFAILURE Failed: ,msg=m_pReceiver->GetUris(queueUri, dgramUri, wlsSubscriptionUri);hr=8004da17 +######Logging started. Version=17.0.2011.0627 StartSystemTime:2013-07-26-112344.929 StartLocalTime:2013-07-26-132344.929 PID=0x95c TID=0x960 ContinuedFrom= +07-26-13,12:25:36.611,be,838,0,PAL,runtime.cpp(219),3041de0,0,ERR,JUMPONFAILURE Failed: ,msg=HttpPAL::GetProxies(wszServerTarget, proxyList);hr=8004da0e +07-26-13,14:54:55.842,e3,a44,0,NM,notificationendpoint.cpp(24),0,0,ERR,JUMPONFAILURE Failed: ,msg=m_pReceiver->GetUris(queueUri, dgramUri, wlsSubscriptionUri);hr=8004da17 +07-26-13,12:59:25.172,174,a24,0,WNS,wnpconnmanager.cpp(268),651b70,0068CEB8,VRB,Sending command,data=CNT 1 CON 214 +Context: 48cc +Last-Msg-Id: 0 + +BND 3 CON 102 +Context: 48cc + +118542210062762168551983387045 +07-26-13,12:59:25.484,17e,a24,0,WNS,wnpnet.cpp(1060),651b70,03178DE8,NRM,State changed,from=WNP_CONNECTING;to=WNP_CONNECTED +08-01-13,21:23:30.745,4bf,b5c,0,NM,wnshttpesclient.cpp(229),332148,049B0BE4,ERR,,this=@049B0BE4;wm=0x2 +08-01-13,21:27:44.124,564,b5c,0,WNS,absconn.cpp(177),332148,02BCAE60,VRB,Received data from server,dwID=0x0;dwSize=0x3e;pbData=PNG 9 CON 48 + +44 diff --git a/test_data/skydriveerr.log b/test_data/skydriveerr.log new file mode 100755 index 0000000..28ad2d9 --- /dev/null +++ b/test_data/skydriveerr.log @@ -0,0 +1,28 @@ +######Logging started. Version=17.0.2011.0627 StartSystemTime:2013-07-25-160323.291 StartLocalTime:2013-07-25-180323.291 PID=0x8f4 TID=0x718 ContinuedFrom= +07-25-13,16:03:24.649,13,a98,0,AUTH,authapi.cpp(280),0,0,ERR,Sign in failed : DRX_E_AUTH_NO_VALID_CREDENTIALS, +07-25-13,16:03:24.649,15,718,0,PAL,systeminformationhelper.cpp(661),0,002AF7F8,ERR,The registry key to block Remote Access is not found.,System Error Code=0x2 +07-25-13,16:04:02.669,48,a98,0,AUTH,oauthprofile.cpp(268),0,0,ERR,No node found named PassportMemberName, no user name available, +07-25-13,16:06:31.306,cb,5d4,0,WNS,wnpconnmanager.cpp(176),7a93608,0,ERR,Failed to resolve proxy name. not using proxy,proxyName=;hr=8004da0e +07-25-13,16:06:31.321,f8,5c4,0,P2P,relayedtransportfactory.cpp(72),7a7ea58,0,ERR,JUMPONFAILURE Failed: ,msg=Runtime::GetProxyName(serverName, proxyName, port);hr=8004da0e +07-25-13,16:06:32.351,194,884,0,NM,notificationendpoint.cpp(24),0,0,ERR,JUMPONFAILURE Failed: ,msg=m_pReceiver->GetUris(queueUri, dgramUri, wlsSubscriptionUri);hr=8004da17 +07-25-13,16:06:33.755,238,a98,0,DFA,ranotificationmanager.cpp(383),0,02A36DC8,ERR,Succeeded Macro failed,hr=8004da0e +######Logging started. Version=17.0.2011.0627 StartSystemTime:2013-07-25-174828.105 StartLocalTime:2013-07-25-194828.105 PID=0x90c TID=0x910 ContinuedFrom= +07-25-13,17:48:47.582,9e,aa0,0,NM,notificationendpoint.cpp(24),0,0,ERR,JUMPONFAILURE Failed: ,msg=m_pReceiver->GetUris(queueUri, dgramUri, wlsSubscriptionUri);hr=8004da17 +07-25-13,17:48:49.189,13c,970,0,PAL,statemachine.cpp(274),2c2d5b0,0,ERR,No such transition defined,owner=@02C5A384;from=Closed;to=Closed +07-26-13,11:18:11.778,137,9ec,0,NM,notificationendpoint.cpp(24),0,0,ERR,JUMPONFAILURE Failed: ,msg=m_pReceiver->GetUris(queueUri, dgramUri, wlsSubscriptionUri);hr=8004da17 +######Logging started. Version=17.0.2011.0627 StartSystemTime:2013-07-26-112344.929 StartLocalTime:2013-07-26-132344.929 PID=0x95c TID=0x960 ContinuedFrom= +07-26-13,12:25:36.611,be,838,0,PAL,runtime.cpp(219),3041de0,0,ERR,JUMPONFAILURE Failed: ,msg=HttpPAL::GetProxies(wszServerTarget, proxyList);hr=8004da0e +07-26-13,14:54:55.842,e3,a44,0,NM,notificationendpoint.cpp(24),0,0,ERR,JUMPONFAILURE Failed: ,msg=m_pReceiver->GetUris(queueUri, dgramUri, wlsSubscriptionUri);hr=8004da17 +07-26-13,12:59:25.172,174,a24,0,WNS,wnpconnmanager.cpp(268),651b70,0068CEB8,VRB,Sending command,data=CNT 1 CON 214 +Context: 48cc +Last-Msg-Id: 0 + +BND 3 CON 102 +Context: 48cc + +118542210062762168551983387045 +07-26-13,12:59:25.484,17e,a24,0,WNS,wnpnet.cpp(1060),651b70,03178DE8,NRM,State changed,from=WNP_CONNECTING;to=WNP_CONNECTED +08-01-13,21:23:30.745,4bf,b5c,0,NM,wnshttpesclient.cpp(229),332148,049B0BE4,ERR,,this=@049B0BE4;wm=0x2 +08-01-13,21:27:44.124,564,b5c,0,WNS,absconn.cpp(177),332148,02BCAE60,VRB,Received data from server,dwID=0x0;dwSize=0x3e;pbData=PNG 9 CON 48 + +44 diff --git a/test_data/skype_main.db b/test_data/skype_main.db new file mode 100644 index 0000000..5338cc6 Binary files /dev/null and b/test_data/skype_main.db differ diff --git a/test_data/snapshot.db b/test_data/snapshot.db new file mode 100644 index 0000000..e09777b Binary files /dev/null and b/test_data/snapshot.db differ diff --git a/test_data/syslog b/test_data/syslog new file mode 100644 index 0000000..8103798 --- /dev/null +++ b/test_data/syslog @@ -0,0 +1,16 @@ +Jan 22 07:52:33 myhostname.myhost.com client[30840]: INFO No new content. +Jan 22 07:52:33 myhostname.myhost.com client[30840]: INFO No change in [/etc/netgroup]. Done +Jan 22 07:53:01 myhostname.myhost.com CRON[31051]: (root) CMD (touch /var/run/crond.somecheck) +Jan 22 07:54:01 myhostname.myhost.com CRON[31068]: (root) CMD (touch /var/run/crond.somecheck) +Jan 22 07:54:01 myhostname.myhost.com CRON[31067]: (root) CMD (/sbin/status.mycheck)) +Jan 22 07:54:32 myhostname.myhost.com Job `cron.daily' terminated +Feb 29 01:15:43: --- testing leap year in parsing, events take place in 2012 --- +MMM 22 07:54:32 myhostname.myhost.com anacron[29782]: Normal exit (1 job run) +Dec 18 17:54:32 myhostname.myhost.com anacron[1234]: No true exit can exist (124 job run) +Mar 23 23:01:18 myhostname.myhost.com somrandomexe[1915]: This syslog message is brought to you by me (and not the other guy) +Mar 23 23:01:18.123 myhostname.myhost.com somrandomexe[19]: This syslog message has a fractional value for seconds. +Mar 23 Wrong line that should not be able to get through +Dec 31 17:54:32 myhostname.myhost.com anacron[1234]: Another one just like this (124 job run) +Nov 18 01:15:20 myhostname.myhost.com aprocess[101001]: This is a multi-line message that screws up + many syslog parsers. +Nov 18 01:15:43: --- last message repeated 5 times --- diff --git a/test_data/syslog.bz2 b/test_data/syslog.bz2 new file mode 100644 index 0000000..b318d76 Binary files /dev/null and b/test_data/syslog.bz2 differ diff --git a/test_data/syslog.gz b/test_data/syslog.gz new file mode 100644 index 0000000..b9770f5 Binary files /dev/null and b/test_data/syslog.gz differ diff --git a/test_data/syslog.tar b/test_data/syslog.tar new file mode 100644 index 0000000..b830c88 Binary files /dev/null and b/test_data/syslog.tar differ diff --git a/test_data/syslog.tgz b/test_data/syslog.tgz new file mode 100644 index 0000000..91430f4 Binary files /dev/null and b/test_data/syslog.tgz differ diff --git a/test_data/syslog.zip b/test_data/syslog.zip new file mode 100644 index 0000000..79ef41f Binary files /dev/null and b/test_data/syslog.zip differ diff --git a/test_data/syslog_copy b/test_data/syslog_copy new file mode 100644 index 0000000..d59a6bc --- /dev/null +++ b/test_data/syslog_copy @@ -0,0 +1,15 @@ +Jan 22 07:52:33 myhostname.myhost.com client[30840]: INFO No new content. +Jan 22 07:52:33 myhostname.myhost.com client[30840]: INFO No change in [/etc/netgroup]. Done +Jan 22 07:53:01 myhostname.myhost.com CRON[31051]: (root) CMD (touch /var/run/crond.somecheck) +Jan 22 07:54:01 myhostname.myhost.com CRON[31068]: (root) CMD (touch /var/run/crond.somecheck) +Jan 22 07:54:01 myhostname.myhost.com CRON[31067]: (root) CMD (/sbin/status.mycheck)) +Jan 22 07:54:32 myhostname.myhost.com Job `cron.daily' terminated +MMM 22 07:54:32 myhostname.myhost.com anacron[29782]: Normal exit (1 job run) +Dec 18 17:54:32 myhostname.myhost.com anacron[1234]: No true exit can exist (124 job run) +Mar 23 23:01:18 myhostname.myhost.com somrandomexe[1915]: This syslog message is brought to you by me (and not the other guy) +Mar 23 23:01:18.123 myhostname.myhost.com somrandomexe[19]: This syslog message has a fractional value for seconds. +Mar 23 Wrong line that should not be able to get through +Dec 31 17:54:32 myhostname.myhost.com anacron[1234]: Another one just like this (124 job run) +Nov 18 01:15:20 myhostname.myhost.com aprocess[101001]: This is a multi-line message that screws up + many syslog parsers. +Nov 18 01:15:43: --- last message repeated 5 times --- diff --git a/test_data/syslog_image.dd b/test_data/syslog_image.dd new file mode 100644 index 0000000..da514bc Binary files /dev/null and b/test_data/syslog_image.dd differ diff --git a/test_data/test.pcap b/test_data/test.pcap new file mode 100644 index 0000000..2f0887c Binary files /dev/null and b/test_data/test.pcap differ diff --git a/test_data/testdir/filter2.txt b/test_data/testdir/filter2.txt new file mode 100644 index 0000000..2e880b5 --- /dev/null +++ b/test_data/testdir/filter2.txt @@ -0,0 +1 @@ +Filter test file 2. diff --git a/test_data/testdir/filter_1.txt b/test_data/testdir/filter_1.txt new file mode 100644 index 0000000..9d569cf --- /dev/null +++ b/test_data/testdir/filter_1.txt @@ -0,0 +1 @@ +Filter test file 1. diff --git a/test_data/testdir/filter_3.txt b/test_data/testdir/filter_3.txt new file mode 100644 index 0000000..2392d5e --- /dev/null +++ b/test_data/testdir/filter_3.txt @@ -0,0 +1 @@ +Filter test file 3. diff --git a/test_data/text_parser/test1.txt b/test_data/text_parser/test1.txt new file mode 100644 index 0000000..9ed673d --- /dev/null +++ b/test_data/text_parser/test1.txt @@ -0,0 +1,3 @@ +first line. +second line. +third line. diff --git a/test_data/text_parser/test2.txt b/test_data/text_parser/test2.txt new file mode 100644 index 0000000..026889d --- /dev/null +++ b/test_data/text_parser/test2.txt @@ -0,0 +1,3 @@ +01/01/2011 05:23:15 myuser:myhost- first line. +12/24/1991 19:58:06 myuser:myhost- second line. +06/01/1945 08:20:00 myuser:myhost- third line. diff --git a/test_data/tsk_volume_system.raw b/test_data/tsk_volume_system.raw new file mode 100644 index 0000000..ce35902 Binary files /dev/null and b/test_data/tsk_volume_system.raw differ diff --git a/test_data/typed_history.xml b/test_data/typed_history.xml new file mode 100644 index 0000000..5d7df5b --- /dev/null +++ b/test_data/typed_history.xml @@ -0,0 +1,11 @@ + + + + + + + diff --git a/test_data/usage-history.xml b/test_data/usage-history.xml new file mode 100644 index 0000000..96dec3b --- /dev/null +++ b/test_data/usage-history.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test_data/user.plist b/test_data/user.plist new file mode 100644 index 0000000..f151668 Binary files /dev/null and b/test_data/user.plist differ diff --git a/test_data/utmp b/test_data/utmp new file mode 100644 index 0000000..55fd568 Binary files /dev/null and b/test_data/utmp differ diff --git a/test_data/utmpx_mac b/test_data/utmpx_mac new file mode 100644 index 0000000..a5de84b Binary files /dev/null and b/test_data/utmpx_mac differ diff --git a/test_data/vsstest.qcow2 b/test_data/vsstest.qcow2 new file mode 100644 index 0000000..4f4c434 Binary files /dev/null and b/test_data/vsstest.qcow2 differ diff --git a/test_data/wifi.log b/test_data/wifi.log new file mode 100644 index 0000000..a65e94c --- /dev/null +++ b/test_data/wifi.log @@ -0,0 +1,10 @@ +Thu Nov 14 20:14:37.123 ***Starting Up*** +Thu Nov 14 20:36:37.222 airportdProcessDLILEvent: en0 attached (up) +Thu Nov 14 20:36:43.818 _doAutoJoin: Already associated to “CampusNet”. Bailing on auto-join. +Thu Nov 14 21:50:52.395 _handleLinkEvent: Unable to process link event, op mode request returned -3903 (Operation not supported) +Thu Nov 14 21:52:04.230 _doAutoJoin: Already associated to “CampusNet”. Bailing on auto-join. +Thu Nov 14 21:52:04.363 _doAutoJoin: Already associated to “CampusNet”. Bailing on auto-join. +Thu Nov 14 21:52:09.883 _processSystemPSKAssoc: No password for network [ssid=AndroidAP, bssid=88:30:8a:7a:61:88, security=WPA2 Personal, rssi=-21, channel= [channelNumber=11(2GHz), channelWidth={20MHz}], ibss=0] in the system keychain +Thu Nov 14 21:52:30.737 _doAutoJoin: Already associated to “AndroidAP”. Bailing on auto-join. +Tue Dec 31 23:59:38.165 _doAutoJoin: Already associated to “AndroidAP”. Bailing on auto-join. +Wed Jan 1 01:12:17.311 _doAutoJoin: Already associated to “AndroidAP”. Bailing on auto-join. diff --git a/test_data/wintask.job b/test_data/wintask.job new file mode 100644 index 0000000..3304166 Binary files /dev/null and b/test_data/wintask.job differ diff --git a/test_data/wtmp.1 b/test_data/wtmp.1 new file mode 100644 index 0000000..d8e72b2 Binary files /dev/null and b/test_data/wtmp.1 differ diff --git a/test_data/xchat.log b/test_data/xchat.log new file mode 100755 index 0000000..daf0a79 --- /dev/null +++ b/test_data/xchat.log @@ -0,0 +1,22 @@ +**** BEGIN LOGGING AT Mon Dec 31 21:11:55 2011 + +dec 31 21:11:55 --> You are now talking on #gugle + dec 31 21:11:55 --- Topic for #gugle is plaso, a difficult word +dec 31 21:11:55 Topic for #gugle set by Kristinn +dec 31 21:11:55 --- Joachim gives voice to fpi +dec 31 21:11:55 * XChat here +dec 31 21:11:58 ola plas-ing guys! +dec 31 23:00:00 日本 +dec 31 39:95:90 uh oh wronggg date! + +**** END LOGGING AT Mon Dec 31 23:59:00 2011 + +**** INIZIO DEL LOG Sun Feb 26 19:52:46 2012 + +feb 26 19:52:51 * ST=Mosca +feb 26 19:53:01 ciao plaso! Com'è?! +feb 26 19:54:00 ftw (it :p)? +feb 26 19:56:01 + +**** FINE DEL LOG Sun Feb 26 19:52:52 2012 + diff --git a/test_data/xchatscrollback.log b/test_data/xchatscrollback.log new file mode 100644 index 0000000..1464ec6 --- /dev/null +++ b/test_data/xchatscrollback.log @@ -0,0 +1,11 @@ +T 1232074579 19* 19Speaking now on ##plaso## +T 1232074587 23* Joachim è uscito (23Client exited23) +T 1232315916 Tcl interface unloaded +T 1232315916 Python interface unloaded +T MISSING TIMESTAMP +T 1232959856 19* 19Talking on #plasify +T 0 0 is a good timestamp +T 1232959856 29* 29Topic of #plasify 29è: . +T 1232959862 22* Kristinn is know |THE_HARSH_REVIEWER| +T 1232959932 31<fpi>30Hi Kristinn! +T 1232959993 31<Kristinn>30 GO AND WRITE PARSERS!!! O_o diff --git a/test_data/ímynd.dd b/test_data/ímynd.dd new file mode 100644 index 0000000..4dd70b7 Binary files /dev/null and b/test_data/ímynd.dd differ diff --git a/tools/README.tools b/tools/README.tools new file mode 100644 index 0000000..ad5b9b1 --- /dev/null +++ b/tools/README.tools @@ -0,0 +1,8 @@ +This folder contains few extra nuggets, scripts that somehow use the plaso +libraries or other files that can be used with plaso. In other words useful +tools that use the underlying infrastructure to perform actions that are not +part of the original design. + +There is no formal setup file for any of the scripts here, these are mostly +provided here as a PoC showing what can be done using the plaso libraries +to extend the tool. diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100755 index 0000000..f462564 --- /dev/null +++ b/tools/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tools/plaso_extract_search_history.py b/tools/plaso_extract_search_history.py new file mode 100755 index 0000000..1c15d4c --- /dev/null +++ b/tools/plaso_extract_search_history.py @@ -0,0 +1,242 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Extract search history from a plaso storage file and enjoy a cup of tea. + +A very simple script that takes as an input a plaso storage file +and then tries to extract common search engine history from it and spit +it out to your lovely little screen or a file of your choosings. +""" +import argparse +import locale +import logging +import os +import sys +import urllib + +# pylint: disable=unused-import +from plaso import filters +from plaso import formatters + +from plaso.lib import output +from plaso.lib import storage + +# Here we define filters and callback methods for all hits on each filter. +FILTERS = ( + (('source is "WEBHIST" and url iregexp "(www.|encrypted.|/)google." and ' + 'url contains "search"'), 'GoogleSearch'), + ('source is "WEBHIST" and url contains "youtube.com"', 'YouTube'), + (('source is "WEBHIST" and url contains "bing.com" and url contains ' + '"search"'), 'BingSearch'), + ('source is "WEBHIST" and url contains "mail.google.com"', 'Gmail'), + (('source is "WEBHIST" and url contains "yandex.com" and url contains ' + '"yandsearch"'), 'Yandex'), + ('source is "WEBHIST" and url contains "duckduckgo.com"', 'DuckDuckGo') +) + + +def ScrubLine(line): + """Scrub the line of most obvious HTML codes. + + An attempt at taking a line and swapping all instances + of %XX which represent a character in hex with it's + unicode character. + + Args: + line: The string that we are about to "fix". + + Returns: + String that has it's %XX hex codes swapped for text. + """ + if not line: + return '' + + if not '%' in line: + return line + + try: + return unicode(urllib.unquote(str(line)), 'utf-8') + except UnicodeDecodeError: + logging.warning(u'Unable to decode line: {0:s}'.format(line)) + + return line + + +class FilterClass(object): + """A class that contains all the parser functions.""" + + @classmethod + def _GetBetweenQEqualsAndAmbersand(cls, string): + """Return back string that is defined 'q=' and '&'.""" + if 'q=' not in string: + return string + _, _, line = string.partition('q=') + before_and, _, _ = line.partition('&') + if not before_and: + return line + return before_and.split()[0] + + @classmethod + def _SearchAndQInLine(cls, string): + """Return a bool indicating if the words q= and search appear in string.""" + if 'search' not in string: + return False + + if 'q=' not in string: + return False + + return True + + @classmethod + def GoogleSearch(cls, url): + """Return back the extracted string.""" + if not cls._SearchAndQInLine(url): + return + + line = cls._GetBetweenQEqualsAndAmbersand(url) + if not line: + return + + return line.replace('+', ' ') + + @classmethod + def YouTube(cls, url): + """Return back the extracted string.""" + return cls.GenericSearch(url) + + @classmethod + def BingSearch(cls, url): + """Return back the extracted string.""" + return cls.GenericSearch(url) + + @classmethod + def GenericSearch(cls, url): + """Return back the extracted string from a generic search engine.""" + if not cls._SearchAndQInLine(url): + return + + return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ') + + @classmethod + def Yandex(cls, url): + """Return back the results from Yandex search engine.""" + if 'text=' not in url: + return + _, _, line = url.partition('text=') + before_and, _, _ = line.partition('&') + if not before_and: + return + yandex_search_url = before_and.split()[0] + + return yandex_search_url.replace('+', ' ') + + @classmethod + def DuckDuckGo(cls, url): + """Return back the extracted string.""" + if not 'q=' in url: + return + return cls._GetBetweenQEqualsAndAmbersand(url).replace('+', ' ') + + @classmethod + def Gmail(cls, url): + """Return back the extracted string.""" + if 'search/' not in url: + return + + _, _, line = url.partition('search/') + first, _, _ = line.partition('/') + second, _, _ = first.partition('?compose') + + return second.replace('+', ' ') + + +def Main(): + """Run the tool.""" + arg_parser = argparse.ArgumentParser( + description=( + 'plaso_extract_search_history is a simple script that reads the ' + 'content of a plaso storage file and tries to extract known search ' + 'engine history from it')) + + arg_parser.add_argument( + '-w', '--write', metavar='FILENAME', action='store', dest='output_file', + default='', help='Write results to a file.') + + arg_parser.add_argument( + 'filename', action='store', metavar='STORAGE_FILE', help=( + 'The path to the plaso storage file.')) + + options = arg_parser.parse_args() + preferred_encoding = locale.getpreferredencoding() + if preferred_encoding.lower() == 'ascii': + preferred_encoding = 'utf-8' + + if not os.path.isfile(options.filename): + raise RuntimeError(u'File {} does not exist'.format(options.filename)) + + results = {} + result_count = {} + + output_filehandle = output.OutputFilehandle(preferred_encoding) + if options.output_file: + output_filehandle.Open(path=options.output_file) + else: + output_filehandle.Open(sys.stdout) + + # Build filters. + filter_dict = {} + for filter_str, call_back in FILTERS: + filter_obj = filters.GetFilter(filter_str) + call_back_obj = getattr(FilterClass, call_back, None) + results[call_back] = [] + if filter_obj and call_back_obj: + filter_dict[filter_obj] = (call_back, call_back_obj) + + with storage.StorageFile(options.filename, read_only=True) as store: + event_object = store.GetSortedEntry() + while event_object: + for filter_obj, call_backs in filter_dict.items(): + call_back_name, call_back_object = call_backs + if filter_obj.Match(event_object): + url_attribute = getattr(event_object, 'url', None) + if not url_attribute: + continue + ret_line = ScrubLine(call_back_object(url_attribute)) + if not ret_line: + continue + if ret_line in results[call_back_name]: + result_count[u'{}:{}'.format(call_back_name, ret_line)] += 1 + else: + results[call_back_name].append(ret_line) + result_count[u'{}:{}'.format(call_back_name, ret_line)] = 1 + event_object = store.GetSortedEntry() + + for engine_name, result_list in results.items(): + results_with_count = [] + for result in result_list: + results_with_count.append(( + result_count[u'{}:{}'.format(engine_name, result)], result)) + + header = u' == ENGINE: {0:s} ==\n'.format(engine_name) + output_filehandle.WriteLine(header) + for count, result in sorted(results_with_count, reverse=True): + line = u'{} {}\n'.format(count, result) + output_filehandle.WriteLine(line) + output_filehandle.WriteLine('\n') + + +if __name__ == '__main__': + Main() diff --git a/tools/plaso_process_info.py b/tools/plaso_process_info.py new file mode 100644 index 0000000..81f78e0 --- /dev/null +++ b/tools/plaso_process_info.py @@ -0,0 +1,254 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A simple tool that provides an overview of running log2timeline processes. + +The tool iterates over all process running on the system looking for one +running log2timeline. If it finds it, it will print out information detected +from each process. + +There is also an option to drop into an IPython shell to further interact with +the process, giving the user the option to for instance terminate processes +that are in a zombie state. +""" + +import argparse +import IPython +import sys +import textwrap + +import psutil + +from plaso.frontend import frontend +from plaso.multi_processing import process_info + + +def IsWorkerProcess(process): + """Checks whether a process is a worker process. + + Args: + process: A process object (instance of ProcessInfo). + + Returns: + A boolean value indicating whether or not the process is a worker. + """ + # The parent needs to be log2timeline. + if not 'log2timeline' in process.parent.name: + return False + + # If it has an active RPC server then we know for sure. + rpc_status = process.GetProcessStatus() + if rpc_status: + return True + + # We still want to continue checking, in case the RPC + # server was not working. + # TODO: Add additional tests to verify this is a worker, + # perhaps look at libraries loaded, etc. + return False + + +class ProcessInformationFrontend(frontend.Frontend): + """A frontend implementation for the process information tool.""" + + def __init__(self): + """Initialize the process information frontend.""" + self._input_reader = frontend.StdinFrontendInputReader() + self._output_writer = frontend.StdoutFrontendOutputWriter() + self._parent_list = [] + self._process_list = [] + + super(ProcessInformationFrontend, self).__init__( + self._input_reader, self._output_writer) + + def PrintRPCDetails(self, process): + """Print detailed information about a running process. + + Args: + process: A process object (instance of ProcessInfo). + """ + self._output_writer.Write(u'RPC Status:\n') + rpc_status = process.GetProcessStatus() + if rpc_status: + for key, value in rpc_status.iteritems(): + self._output_writer.Write(u'\t{0:s} = {1!s}\n'.format(key, value)) + else: + self._output_writer.Write(u'\tNo RPC client listening.\n') + + def PrintProcessDetails(self, process): + """Print detailed information about a running process. + + Args: + process: A process object (instance of ProcessInfo). + """ + mem_info = process.GetMemoryInformation() + + self.PrintSeparatorLine() + self._output_writer.Write(u'\n{0:20s}{1:s} [{2:d}]\n'.format( + u'', process.name, process.pid)) + self.PrintSeparatorLine() + self.PrintHeader(u'Basic Information') + self._output_writer.Write(u'Name:\n\t{0:s}\n'.format(process.name)) + self._output_writer.Write(u'PID:\n\t{0:d}\n'.format(process.pid)) + self._output_writer.Write(u'Command Line:\n\t{0:s}\n'.format( + process.command_line)) + self._output_writer.Write(u'Process Alive:\n\t{0!s}\n'.format( + process.IsAlive())) + self._output_writer.Write(u'Process Status:\n\t{0:s}\n'.format( + process.status)) + + is_a_worker = IsWorkerProcess(process) + if is_a_worker: + self._output_writer.Write(u'This is a worker thread.\n') + else: + self._output_writer.Write(u'This is NOT a worker.\n') + + self._output_writer.Write(u'\n') + self.PrintHeader(u' * Additional Information') + self._output_writer.Write(u'Parent PID:\n\t{0:d} ({1:s})\n'.format( + process.parent.pid, process.parent.name)) + self._output_writer.Write(u'Children:\n') + for child in process.children: + self._output_writer.Write(u'\t{0:d} [{1:s}]\n'.format( + child.pid, child.name)) + + if is_a_worker: + self.PrintRPCDetails(process) + + self._output_writer.Write('Nr. of Threads:\n\t{0:d}\n'.format( + process.number_of_threads)) + + self._output_writer.Write('Open files:\n') + for open_file in process.open_files: + self._output_writer.Write(u'\t{0:s}\n'.format(open_file)) + + self._output_writer.Write(u'Memory:\n') + # We need to access a protected attribute to get the + # name of all the fields in the memory object. + # pylint: disable=protected-access + for field in mem_info._fields: + self._output_writer.Write(u'\t{0:s} = {1!s}\n'.format( + field, getattr(mem_info, field, u''))) + + self._output_writer.Write('Memory map: \n') + for memory_map in process.memory_map: + self._output_writer.Write(u'\t{0:s}\n'.format(memory_map.path)) + + def BuildProcessList(self): + """Build a list of processes.""" + for process_object in psutil.get_process_list(): + # TODO: This may catch other processes, such as "vim + # foo/log2timeline/foo.py" since that's in the command line. However the + # python log2timeline.py will cause the older approach of name to fail. + try: + command_line = u' '.join(process_object.cmdline) + # pylint: disable=protected-access + except psutil._error.AccessDenied: + continue + if 'log2timeline' in command_line: + process_details = process_info.ProcessInfo(pid=process_object.pid) + self._process_list.append(process_details) + parent_process = process_details.parent + children = list(process_details.children) + if 'log2timeline' not in parent_process.name and len(children): + self._parent_list.append(process_details) + + def TerminateWorkers(self): + for process_object in self._process_list: + # Find out which process is a worker and which one isn't. + if IsWorkerProcess(process_object): + self._output_writer.Write( + u'Killing process: {0:s} [{1:d}] - {2:s}\n'.format( + process_object.name, process_object.pid, + process_object.status)) + process_object.TerminateProcess() + + def ListProcesses(self): + if self._parent_list: + self._output_writer.Write(u'Main process (careful before killing):\n') + for parent_process in self._parent_list: + if parent_process.IsAlive(): + status = u'Alive' + else: + status = u'Dead' + + self._output_writer.Write(( + u'{4}\n\tPid: {1:d}\n\tCommand Line: {0:s}\n\tStatus:{2} ' + u'<{3:s}>\n{4:s}\n').format( + parent_process.command_line, parent_process.pid, + status, parent_process.status, u'-'*40)) + self._output_writer.Write(u'\n') + + if not self._process_list: + self._output_writer.Write( + u'No processes discovered. Are you sure log2timeline is running?\n') + return + + self._output_writer.Write(u'='*80) + self._output_writer.Write(u'\n\t\tDiscovered Processes\n') + self._output_writer.Write(u'='*80) + self._output_writer.Write(u'\n') + for process_object in self._process_list: + self.PrintProcessDetails(process_object) + + +def Main(): + """Read parameters and run the tool.""" + front_end = ProcessInformationFrontend() + + description = ( + u'A simple tool that tries to list up all processes that belong to ' + u'log2timeline. Once a process is detected it will print out ' + u'statistical information about it, as well as providing an option ' + u'to attempt to "kill" worker threads.') + arg_parser = argparse.ArgumentParser( + description=textwrap.dedent(description)) + + arg_parser.add_argument( + '-c', '--console', dest='console', action='store_true', default=False, + help=u'Open up an IPython console.') + + arg_parser.add_argument( + '-k', '--kill-workers', '--kill_workers', dest='kill_workers', + action='store_true', default=False, help=( + u'The tool does a rudimentary check to discover worker threads ' + u'and terminates those it finds. This can be used in the case ' + u'where the tool is stuck due to a non-functioning worker that ' + u'prevents the tool from completing it\'s processing.')) + + # TODO: Add an option to specify certain parent if we are killing workers. + options = arg_parser.parse_args() + + front_end.BuildProcessList() + + if options.console: + IPython.embed() + return True + + if options.kill_workers: + front_end.TerminateWorkers() + else: + front_end.ListProcesses() + + return True + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/utils/build_dependencies.py b/utils/build_dependencies.py new file mode 100755 index 0000000..fdb0c69 --- /dev/null +++ b/utils/build_dependencies.py @@ -0,0 +1,2789 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Script to automate creating builds of plaso dependencies.""" + +import abc +import argparse +import fileinput +import glob +import io +import json +import logging +import os +import platform +import re +import shutil +import subprocess +import sys +import tarfile +import time +import urllib2 + +try: + import ConfigParser as configparser +except ImportError: + import configparser + + +# Since os.path.abspath() uses the current working directory (cwd) +# os.path.abspath(__file__) will point to a different location if +# cwd has been changed. Hence we preserve the absolute location of __file__. +__file__ = os.path.abspath(__file__) + + +# TODO: look into merging functionality with update dependencies script. + + +class DependencyDefinition(object): + """Class that implements a dependency definition.""" + + def __init__(self, name): + """Initializes the dependency definition. + + Args: + name: the name of the dependency. + """ + self.description_long = None + self.description_short = None + self.dpkg_dependencies = None + self.dpkg_name = None + self.download_url = None + self.homepage_url = None + self.maintainer = None + self.name = name + + +class DependencyDefinitionReader(object): + """Class that implements a dependency definition reader.""" + + def _GetConfigValue(self, config_parser, section_name, value_name): + """Retrieves a value from the config parser. + + Args: + config_parser: the configuration parser (instance of ConfigParser). + section_name: the name of the section that contains the value. + value_name: the name of the value. + + Returns: + An object containing the value or None if the value does not exists. + """ + try: + return config_parser.get(section_name, value_name) + except configparser.NoOptionError: + return + + def Read(self, file_object): + """Reads dependency definitions. + + Args: + file_object: the file-like object to read from. + + Yields: + Dependency definitions (instances of DependencyDefinition). + """ + # TODO: replace by: + # config_parser = configparser. ConfigParser(interpolation=None) + config_parser = configparser.RawConfigParser() + config_parser.readfp(file_object) + + for section_name in config_parser.sections(): + dependency_definition = DependencyDefinition(section_name) + dependency_definition.description_long = self._GetConfigValue( + config_parser, section_name, 'description_long') + dependency_definition.description_short = self._GetConfigValue( + config_parser, section_name, 'description_short') + dependency_definition.dpkg_dependencies = self._GetConfigValue( + config_parser, section_name, 'dpkg_dependencies') + dependency_definition.dpkg_name = self._GetConfigValue( + config_parser, section_name, 'dpkg_name') + dependency_definition.download_url = self._GetConfigValue( + config_parser, section_name, 'download_url') + dependency_definition.homepage_url = self._GetConfigValue( + config_parser, section_name, 'homepage_url') + dependency_definition.maintainer = self._GetConfigValue( + config_parser, section_name, 'maintainer') + + # Need at minimum a name and a download URL. + if dependency_definition.name and dependency_definition.download_url: + yield dependency_definition + + +class DownloadHelper(object): + """Class that helps in downloading a project.""" + + def __init__(self): + """Initializes the download helper.""" + super(DownloadHelper, self).__init__() + self._cached_url = u'' + self._cached_page_content = '' + + def Download(self, project_name, project_version): + """Downloads the project for a given project name and version. + + Args: + project_name: the name of the project. + project_version: the version of the project. + + Returns: + The filename if successful also if the file was already downloaded + or None on error. + """ + download_url = self.GetDownloadUrl(project_name, project_version) + if not download_url: + logging.warning(u'Unable to determine download URL for: {0:s}'.format( + project_name)) + return + + return self.DownloadFile(download_url) + + def DownloadFile(self, download_url): + """Downloads a file from the URL and returns the filename. + + The filename is extracted from the last part of the URL. + + Args: + download_url: the URL where to download the file. + + Returns: + The filename if successful also if the file was already downloaded + or None on error. + """ + _, _, filename = download_url.rpartition(u'/') + + if not os.path.exists(filename): + logging.info(u'Downloading: {0:s}'.format(download_url)) + + url_object = urllib2.urlopen(download_url) + if url_object.code != 200: + return + + file_object = open(filename, 'wb') + file_object.write(url_object.read()) + file_object.close() + + return filename + + def DownloadPageContent(self, download_url): + """Downloads the page content from the URL and caches it. + + Args: + download_url: the URL where to download the page content. + + Returns: + The page content if successful, None otherwise. + """ + if not download_url: + return + + if self._cached_url != download_url: + url_object = urllib2.urlopen(download_url) + + if url_object.code != 200: + return + + self._cached_page_content = url_object.read() + self._cached_url = download_url + + return self._cached_page_content + + @abc.abstractmethod + def GetDownloadUrl(self, project_name, project_version): + """Retrieves the download URL for a given project name and version. + + Args: + project_name: the name of the project. + project_version: the version of the project. + + Returns: + The download URL of the project or None on error. + """ + + @abc.abstractmethod + def GetProjectIdentifier(self, project_name): + """Retrieves the project identifier for a given project name. + + Args: + project_name: the name of the project. + + Returns: + The project identifier or None on error. + """ + + +class GoogleCodeWikiDownloadHelper(DownloadHelper): + """Class that helps in downloading a wiki-based Google code project.""" + + def GetLatestVersion(self, project_name): + """Retrieves the latest version number for a given project name. + + Args: + project_name: the name of the project. + + Returns: + The a string containing the latest version number or None on error. + """ + download_url = u'https://code.google.com/p/{0:s}/downloads/list'.format( + project_name) + + page_content = self.DownloadPageContent(download_url) + if not page_content: + return + + # The format of the project download URL is: + # href="//{project name}.googlecode.com/files/ + # {project name}-{version}.tar.gz + expression_string = ( + u'href="//{0:s}.googlecode.com/files/' + u'{0:s}-([0-9]+[.][0-9]+|[0-9]+[.][0-9]+[.][0-9]+)[.]tar[.]gz').format( + project_name) + matches = re.findall(expression_string, page_content) + + if not matches: + return + + # Split the version string and convert every digit into an integer. + # A string compare of both version strings will yield an incorrect result. + matches = [map(int, match.split(u'.')) for match in matches] + + # Find the latest version number and transform it back into a string. + return u'.'.join([u'{0:d}'.format(digit) for digit in max(matches)]) + + def GetDownloadUrl(self, project_name, project_version): + """Retrieves the download URL for a given project name and version. + + Args: + project_name: the name of the project. + project_version: the version of the project. + + Returns: + The download URL of the project or None on error. + """ + return ( + u'https://{0:s}.googlecode.com/files/{0:s}-{1:s}.tar.gz').format( + project_name, project_version) + + def GetProjectIdentifier(self, project_name): + """Retrieves the project identifier for a given project name. + + Args: + project_name: the name of the project. + + Returns: + The project identifier or None on error. + """ + return u'com.google.code.p.{0:s}'.format(project_name) + + +class GithubReleasesDownloadHelper(DownloadHelper): + """Class that helps in downloading a project with GitHub releases.""" + + def __init__(self, organization): + """Initializes the download helper. + + Args: + organization: the github organization or user name. + """ + super(GithubReleasesDownloadHelper, self).__init__() + self.organization = organization + + def GetLatestVersion(self, project_name): + """Retrieves the latest version number for a given project name. + + Args: + project_name: the name of the project. + + Returns: + The latest version number or 0 on error. + """ + download_url = u'https://github.com/{0:s}/{1:s}/releases'.format( + self.organization, project_name) + + page_content = self.DownloadPageContent(download_url) + if not page_content: + return 0 + + # The format of the project download URL is: + # /{organization}/{project name}/releases/download/{git tag}/ + # {project name}{status-}{version}.tar.gz + # Note that the status is optional and will be: beta, alpha or experimental. + expression_string = ( + u'/{0:s}/{1:s}/releases/download/[^/]*/{1:s}-[a-z-]*([0-9]+)' + u'[.]tar[.]gz').format(self.organization, project_name) + matches = re.findall(expression_string, page_content) + + if not matches: + return 0 + + return int(max(matches)) + + def GetDownloadUrl(self, project_name, project_version): + """Retrieves the download URL for a given project name and version. + + Args: + project_name: the name of the project. + project_version: the version of the project. + + Returns: + The download URL of the project or None on error. + """ + download_url = u'https://github.com/{0:s}/{1:s}/releases'.format( + self.organization, project_name) + + page_content = self.DownloadPageContent(download_url) + if not page_content: + return + + # The format of the project download URL is: + # /{organization}/{project name}/releases/download/{git tag}/ + # {project name}{status-}{version}.tar.gz + # Note that the status is optional and will be: beta, alpha or experimental. + expression_string = ( + u'/{0:s}/{1:s}/releases/download/[^/]*/{1:s}-[a-z-]*{2!s}' + u'[.]tar[.]gz').format(self.organization, project_name, project_version) + matches = re.findall(expression_string, page_content) + + if len(matches) != 1: + # Try finding a match without the status in case the project provides + # multiple versions with a different status. + expression_string = ( + u'/{0:s}/{1:s}/releases/download/[^/]*/{1:s}-*{2!s}' + u'[.]tar[.]gz').format( + self.organization, project_name, project_version) + matches = re.findall(expression_string, page_content) + + if not matches or len(matches) != 1: + return + + return u'https://github.com{0:s}'.format(matches[0]) + + def GetProjectIdentifier(self, project_name): + """Retrieves the project identifier for a given project name. + + Args: + project_name: the name of the project. + + Returns: + The project identifier or None on error. + """ + return u'com.github.{0:s}.{1:s}'.format(self.organization, project_name) + + +class GoogleDriveDownloadHelper(DownloadHelper): + """Class that helps in downloading a Google Drive hosted project.""" + + @abc.abstractmethod + def GetGoogleDriveDownloadsUrl(self, project_name): + """Retrieves the Google Drive Download URL. + + Args: + project_name: the name of the project. + + Returns: + The downloads URL or None on error. + """ + + def GetLatestVersion(self, project_name): + """Retrieves the latest version number for a given project name. + + Args: + project_name: the name of the project. + + Returns: + The latest version number or 0 on error. + """ + download_url = self.GetGoogleDriveDownloadsUrl(project_name) + + page_content = self.DownloadPageContent(download_url) + if not page_content: + return 0 + + # The format of the project download URL is: + # /host/{random string}/{project name}-{status-}{version}.tar.gz + # Note that the status is optional and will be: beta, alpha or experimental. + expression_string = u'/host/[^/]*/{0:s}-[a-z-]*([0-9]+)[.]tar[.]gz'.format( + project_name) + matches = re.findall(expression_string, page_content) + + if not matches: + return 0 + + return int(max(matches)) + + def GetDownloadUrl(self, project_name, project_version): + """Retrieves the download URL for a given project name and version. + + Args: + project_name: the name of the project. + project_version: the version of the project. + + Returns: + The download URL of the project or None on error. + """ + download_url = self.GetGoogleDriveDownloadsUrl(project_name) + + page_content = self.DownloadPageContent(download_url) + if not page_content: + return + + # The format of the project download URL is: + # /host/{random string}/{project name}-{status-}{version}.tar.gz + # Note that the status is optional and will be: beta, alpha or experimental. + expression_string = u'/host/[^/]*/{0:s}-[a-z-]*{1!s}[.]tar[.]gz'.format( + project_name, project_version) + matches = re.findall(expression_string, page_content) + + if len(matches) != 1: + # Try finding a match without the status in case the project provides + # multiple versions with a different status. + expression_string = u'/host/[^/]*/{0:s}-{1!s}[.]tar[.]gz'.format( + project_name, project_version) + matches = re.findall(expression_string, page_content) + + if not matches or len(matches) != 1: + return + + return u'https://googledrive.com{0:s}'.format(matches[0]) + + +# TODO: Merge with LibyalGithubReleasesDownloadHelper when Google Drive +# support is no longer needed. +# pylint: disable=abstract-method +class LibyalGitHubDownloadHelper(DownloadHelper): + """Class that helps in downloading a libyal GitHub project.""" + + def __init__(self): + """Initializes the download helper.""" + super(LibyalGitHubDownloadHelper, self).__init__() + self._download_helper = None + + def GetWikiConfigurationSourcePackageUrl(self, project_name): + """Retrieves the source package URL from the libyal wiki configuration. + + Args: + project_name: the name of the project. + + Returns: + The source package URL or None on error. + """ + download_url = ( + u'https://raw.githubusercontent.com/libyal/{0:s}/master/' + u'{0:s}-wiki.ini').format(project_name) + + page_content = self.DownloadPageContent(download_url) + if not page_content: + return + + config_parser = configparser.RawConfigParser() + config_parser.readfp(io.BytesIO(page_content)) + + return json.loads(config_parser.get('source_package', 'url')) + + def GetLatestVersion(self, project_name): + """Retrieves the latest version number for a given project name. + + Args: + project_name: the name of the project. + + Returns: + The latest version number or 0 on error. + """ + if not self._download_helper: + download_url = self.GetWikiConfigurationSourcePackageUrl(project_name) + + if download_url.startswith('https://github.com'): + self._download_helper = LibyalGithubReleasesDownloadHelper() + + elif download_url.startswith('https://googledrive.com'): + self._download_helper = LibyalGoogleDriveDownloadHelper(download_url) + + return self._download_helper.GetLatestVersion(project_name) + + def GetDownloadUrl(self, project_name, project_version): + """Retrieves the download URL for a given project name and version. + + Args: + project_name: the name of the project. + project_version: the version of the project. + + Returns: + The download URL of the project or None on error. + """ + if not self._download_helper: + download_url = self.GetWikiConfigurationSourcePackageUrl(project_name) + + if download_url.startswith('https://github.com'): + self._download_helper = LibyalGithubReleasesDownloadHelper() + + elif download_url.startswith('https://googledrive.com'): + self._download_helper = LibyalGoogleDriveDownloadHelper(download_url) + + return self._download_helper.GetDownloadUrl(project_name, project_version) + + +class LibyalGoogleDriveDownloadHelper(GoogleDriveDownloadHelper): + """Class that helps in downloading a libyal project with Google Drive.""" + + def __init__(self, google_drive_url): + """Initializes the download helper. + + Args: + google_drive_url: the project Google Drive URL. + """ + super(LibyalGoogleDriveDownloadHelper, self).__init__() + self._google_drive_url = google_drive_url + + def GetGoogleDriveDownloadsUrl(self, project_name): + """Retrieves the Download URL from the GitHub project page. + + Args: + project_name: the name of the project. + + Returns: + The downloads URL or None on error. + """ + return self._google_drive_url + + +class LibyalGithubReleasesDownloadHelper(GithubReleasesDownloadHelper): + """Class that helps in downloading a libyal project with GitHub releases.""" + + def __init__(self): + """Initializes the download helper.""" + super(LibyalGithubReleasesDownloadHelper, self).__init__('libyal') + + +class Log2TimelineGitHubDownloadHelper(GithubReleasesDownloadHelper): + """Class that helps in downloading a log2timeline GitHub project.""" + + def __init__(self): + """Initializes the download helper.""" + super(Log2TimelineGitHubDownloadHelper, self).__init__('log2timeline') + + +class PyPiDownloadHelper(DownloadHelper): + """Class that helps in downloading a pypi code project.""" + + def GetLatestVersion(self, project_name): + """Retrieves the latest version number for a given project name. + + Args: + project_name: the name of the project. + + Returns: + The a string containing the latest version number or None on error. + """ + # TODO: add support to handle index of packages pages, e.g. for pyparsing. + download_url = u'https://pypi.python.org/pypi/{0:s}'.format(project_name) + + page_content = self.DownloadPageContent(download_url) + if not page_content: + return + + # The format of the project download URL is: + # https://pypi.python.org/packages/source/{first letter project name}/ + # {project name}/{project name}-{version}.tar.gz + expression_string = ( + u'https://pypi.python.org/packages/source/{0:s}/{1:s}/' + u'{1:s}-([0-9]+[.][0-9]+|[0-9]+[.][0-9]+[.][0-9]+)[.]tar[.]gz').format( + project_name[0], project_name) + matches = re.findall(expression_string, page_content) + + if not matches: + return + + # Split the version string and convert every digit into an integer. + # A string compare of both version strings will yield an incorrect result. + matches = [map(int, match.split(u'.')) for match in matches] + + # Find the latest version number and transform it back into a string. + return u'.'.join([u'{0:d}'.format(digit) for digit in max(matches)]) + + def GetDownloadUrl(self, project_name, project_version): + """Retrieves the download URL for a given project name and version. + + Args: + project_name: the name of the project. + project_version: the version of the project. + + Returns: + The download URL of the project or None on error. + """ + return ( + u'https://pypi.python.org/packages/source/{0:s}/{1:s}/' + u'{1:s}-{2:s}.tar.gz').format( + project_name[0], project_name, project_version) + + def GetProjectIdentifier(self, project_name): + """Retrieves the project identifier for a given project name. + + Args: + project_name: the name of the project. + + Returns: + The project identifier or None on error. + """ + return u'org.python.pypi.{0:s}'.format(project_name) + + +class SourceForgeDownloadHelper(DownloadHelper): + """Class that helps in downloading a Source Forge project.""" + + def GetLatestVersion(self, project_name): + """Retrieves the latest version number for a given project name. + + Args: + project_name: the name of the project. + + Returns: + The a string containing the latest version number or None on error. + """ + # TODO: make this more robust to detect different naming schemes. + download_url = 'http://sourceforge.net/projects/{0:s}/files/{0:s}/'.format( + project_name) + + page_content = self.DownloadPageContent(download_url) + if not page_content: + return 0 + + # The format of the project download URL is: + # /projects/{project name}/files/{project name}/{project name}-{version}/ + expression_string = ( + '' + + _DOCS_FILENAMES = [ + u'CHANGES', u'CHANGES.txt', u'CHANGES.TXT', + u'LICENSE', u'LICENSE.txt', u'LICENSE.TXT', + u'README', u'README.txt', u'README.TXT'] + + _CHANGELOG_TEMPLATE = u'\n'.join([ + u'python-{project_name:s} ({project_version!s}-1) unstable; urgency=low', + u'', + u' * Auto-generated', + u'', + u' -- {maintainer_email_address:s} {date_time:s}']) + + _COMPAT_TEMPLATE = u'\n'.join([ + u'7']) + + _CONTROL_TEMPLATE = u'\n'.join([ + u'Source: python-{project_name:s}', + u'Section: misc', + u'Priority: extra', + u'Maintainer: {upstream_maintainer:s}', + u'Build-Depends: debhelper (>= 7), python, python-setuptools', + u'Standards-Version: 3.8.3', + u'Homepage: {upstream_homepage:s}', + u'', + u'Package: python-{project_name:s}', + u'Section: python', + u'Architecture: all', + u'Depends: {depends:s}', + u'Description: {description_short:s}', + u' {description_long:s}', + u'']) + + _COPYRIGHT_TEMPLATE = u'\n'.join([ + u'']) + + _RULES_TEMPLATE = u'\n'.join([ + u'#!/usr/bin/make -f', + u'# debian/rules that uses debhelper >= 7.', + u'', + u'# Uncomment this to turn on verbose mode.', + u'#export DH_VERBOSE=1', + u'', + u'# This has to be exported to make some magic below work.', + u'export DH_OPTIONS', + u'', + u'', + u'%:', + u' dh $@', + u'', + u'override_dh_auto_clean:', + u'', + u'override_dh_auto_test:', + u'', + u'override_dh_installmenu:', + u'', + u'override_dh_installmime:', + u'', + u'override_dh_installmodules:', + u'', + u'override_dh_installlogcheck:', + u'', + u'override_dh_installlogrotate:', + u'', + u'override_dh_installpam:', + u'', + u'override_dh_installppp:', + u'', + u'override_dh_installudev:', + u'', + u'override_dh_installwm:', + u'', + u'override_dh_installxfonts:', + u'', + u'override_dh_gconf:', + u'', + u'override_dh_icons:', + u'', + u'override_dh_perl:', + u'', + u'override_dh_pysupport:', + u'']) + + def __init__( + self, project_name, project_version, dependency_definition): + """Initializes the dpkg build files generator. + + Args: + project_name: the name of the project. + project_version: the version of the project. + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + """ + super(PythonModuleDpkgBuildFilesGenerator, self).__init__() + self._project_name = project_name + self._project_version = project_version + self._dependency_definition = dependency_definition + + def _GenerateChangelogFile(self, dpkg_path): + """Generate the dpkg build changelog file. + + Args: + dpkg_path: the path to the dpkg files. + """ + timezone_minutes, _ = divmod(time.timezone, 60) + timezone_hours, timezone_minutes = divmod(timezone_minutes, 60) + + # If timezone_hours is -1 {0:02d} will format as -1 instead of -01 + # hence we detect the sign and force a leading zero. + if timezone_hours < 0: + timezone_string = u'-{0:02d}{1:02d}'.format( + -timezone_hours, timezone_minutes) + else: + timezone_string = u'+{0:02d}{1:02d}'.format( + timezone_hours, timezone_minutes) + + date_time_string = u'{0:s} {1:s}'.format( + time.strftime('%a, %d %b %Y %H:%M:%S'), timezone_string) + + if self._dependency_definition.dpkg_name: + project_name = self._dependency_definition.dpkg_name + else: + project_name = self._project_name + + template_values = { + 'project_name': project_name, + 'project_version': self._project_version, + 'maintainer_email_address': self._EMAIL_ADDRESS, + 'date_time': date_time_string} + + filename = os.path.join(dpkg_path, u'changelog') + with open(filename, 'wb') as file_object: + data = self._CHANGELOG_TEMPLATE.format(**template_values) + file_object.write(data.encode('utf-8')) + + def _GenerateCompatFile(self, dpkg_path): + """Generate the dpkg build compat file. + + Args: + dpkg_path: the path to the dpkg files. + """ + filename = os.path.join(dpkg_path, u'compat') + with open(filename, 'wb') as file_object: + data = self._COMPAT_TEMPLATE + file_object.write(data.encode('utf-8')) + + def _GenerateControlFile(self, dpkg_path): + """Generate the dpkg build control file. + + Args: + dpkg_path: the path to the dpkg files. + """ + if self._dependency_definition.dpkg_name: + project_name = self._dependency_definition.dpkg_name + else: + project_name = self._project_name + + + depends = [] + if self._dependency_definition.dpkg_dependencies: + depends.append(self._dependency_definition.dpkg_dependencies) + depends.append('${{shlibs:Depends}}') + depends.append('${{python:Depends}}') + depends = u', '.join(depends) + + template_values = { + 'project_name': project_name, + 'upstream_maintainer': self._dependency_definition.maintainer, + 'upstream_homepage': self._dependency_definition.homepage_url, + 'depends': depends, + 'description_short': self._dependency_definition.description_short, + 'description_long': self._dependency_definition.description_long} + + filename = os.path.join(dpkg_path, u'control') + with open(filename, 'wb') as file_object: + data = self._CONTROL_TEMPLATE.format(**template_values) + file_object.write(data.encode('utf-8')) + + def _GenerateCopyrightFile(self, dpkg_path): + """Generate the dpkg build copyright file. + + Args: + dpkg_path: the path to the dpkg files. + """ + license_file = os.path.join( + os.path.dirname(os.path.dirname(__file__)), u'config', u'licenses', + u'LICENSE.{0:s}'.format(self._project_name)) + + filename = os.path.join(dpkg_path, u'copyright') + + shutil.copy(license_file, filename) + + def _GenerateDocsFile(self, dpkg_path): + """Generate the dpkg build .docs file. + + Args: + dpkg_path: the path to the dpkg files. + """ + if self._dependency_definition.dpkg_name: + project_name = self._dependency_definition.dpkg_name + else: + project_name = self._project_name + + # Determine the available doc files. + doc_files = [] + for filename in self._DOCS_FILENAMES: + if os.path.exists(filename): + doc_files.append(filename) + + filename = os.path.join( + dpkg_path, u'python-{0:s}.docs'.format(project_name)) + with open(filename, 'wb') as file_object: + file_object.write(u'\n'.join(doc_files)) + + def _GenerateRulesFile(self, dpkg_path): + """Generate the dpkg build rules file. + + Args: + dpkg_path: the path to the dpkg files. + """ + filename = os.path.join(dpkg_path, u'rules') + with open(filename, 'wb') as file_object: + data = self._RULES_TEMPLATE + file_object.write(data.encode('utf-8')) + + def GenerateFiles(self, dpkg_path): + """Generate the dpkg build files. + + Args: + dpkg_path: the path to the dpkg files. + """ + os.mkdir(dpkg_path) + self._GenerateChangelogFile(dpkg_path) + self._GenerateCompatFile(dpkg_path) + self._GenerateControlFile(dpkg_path) + self._GenerateCopyrightFile(dpkg_path) + self._GenerateDocsFile(dpkg_path) + self._GenerateRulesFile(dpkg_path) + + +class BuildHelper(object): + """Base class that helps in building.""" + + LOG_FILENAME = u'build.log' + + def __init__(self, dependency_definition): + """Initializes the build helper. + + Args: + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + """ + super(BuildHelper, self).__init__() + self._dependency_definition = dependency_definition + + +class DpkgBuildHelper(BuildHelper): + """Class that helps in building dpkg packages (.deb).""" + + # TODO: determine BUILD_DEPENDENCIES from the build files? + # TODO: what about flex, byacc? + _BUILD_DEPENDENCIES = frozenset([ + 'git', + 'build-essential', + 'autotools-dev', + 'autoconf', + 'automake', + 'autopoint', + 'libtool', + 'gettext', + 'debhelper', + 'fakeroot', + 'quilt', + 'zlib1g-dev', + 'libbz2-dev', + 'libssl-dev', + 'libfuse-dev', + 'python-dev', + 'python-setuptools', + 'libsqlite3-dev', + ]) + + def _BuildPrepare(self, source_directory): + """Make the necassary preperations before building the dpkg packages. + + Args: + source_directory: the name of the source directory. + + Returns: + True if the preparations were successful, False otherwise. + """ + # Script to run before building, e.g. to change the dpkg build files. + if os.path.exists(u'prep-dpkg.sh'): + command = u'sh ../prep-dpkg.sh' + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + return True + + def _BuildFinalize(self, source_directory): + """Make the necassary finalizations after building the dpkg packages. + + Args: + source_directory: the name of the source directory. + + Returns: + True if the finalizations were successful, False otherwise. + """ + # Script to run after building, e.g. to automatically upload + # the dpkg package files to an apt repository. + if os.path.exists(u'post-dpkg.sh'): + command = u'sh ../post-dpkg.sh' + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + return True + + @classmethod + def CheckBuildDependencies(cls): + """Checks if the build dependencies are met. + + Returns: + A list of package names that need to be installed or an empty list. + """ + missing_packages = [] + for package_name in cls._BUILD_DEPENDENCIES: + if not cls.CheckIsInstalled(package_name): + missing_packages.append(package_name) + + return missing_packages + + @classmethod + def CheckIsInstalled(cls, package_name): + """Checks if a package is installed. + + Args: + package_name: the name of the package. + + Returns: + A boolean value containing true if the package is installed + false otherwise. + """ + command = u'dpkg-query -l {0:s} >/dev/null 2>&1'.format(package_name) + exit_code = subprocess.call(command, shell=True) + return exit_code == 0 + + +class LibyalDpkgBuildHelper(DpkgBuildHelper): + """Class that helps in building libyal dpkg packages (.deb).""" + + def __init__(self, dependency_definition): + """Initializes the build helper. + + Args: + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + """ + super(LibyalDpkgBuildHelper, self).__init__(dependency_definition) + self.architecture = platform.machine() + + if self.architecture == 'i686': + self.architecture = 'i386' + elif self.architecture == 'x86_64': + self.architecture = 'amd64' + + def Build(self, source_helper): + """Builds the dpkg packages. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + True if the build was successful, False otherwise. + """ + source_filename = source_helper.Download() + logging.info(u'Building deb of: {0:s}'.format(source_filename)) + + source_directory = source_helper.Create() + if not source_directory: + logging.error( + u'Extraction of source package: {0:s} failed'.format(source_filename)) + return False + + dpkg_directory = os.path.join(source_directory, u'dpkg') + if not os.path.exists(dpkg_directory): + dpkg_directory = os.path.join(source_directory, u'config', u'dpkg') + + if not os.path.exists(dpkg_directory): + logging.error(u'Missing dpkg sub directory in: {0:s}'.format( + source_directory)) + return False + + debian_directory = os.path.join(source_directory, u'debian') + + # If there is a debian directory remove it and recreate it from + # the dpkg directory. + if os.path.exists(debian_directory): + logging.info(u'Removing: {0:s}'.format(debian_directory)) + shutil.rmtree(debian_directory) + shutil.copytree(dpkg_directory, debian_directory) + + if not self._BuildPrepare(source_directory): + return False + + command = u'dpkg-buildpackage -uc -us -rfakeroot > {0:s} 2>&1'.format( + os.path.join(u'..', self.LOG_FILENAME)) + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + if not self._BuildFinalize(source_directory): + return False + + return True + + def Clean(self, source_helper): + """Cleans the dpkg packages in the current directory. + + Args: + source_helper: the source helper (instance of SourceHelper). + """ + filenames_to_ignore = re.compile(u'^{0:s}[-_].*{1!s}'.format( + source_helper.project_name, source_helper.project_version)) + + # Remove files of previous versions in the format: + # library[-_]version-1_architecture.* + filenames = glob.glob( + u'{0:s}[-_]*[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]-1_' + u'{1:s}.*'.format(source_helper.project_name, self.architecture)) + + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + # Remove files of previous versions in the format: + # library[-_]*version-1.* + filenames = glob.glob( + u'{0:s}[-_]*[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]-1.*'.format( + source_helper.project_name)) + + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + def GetOutputFilename(self, source_helper): + """Retrieves the filename of one of the resulting files. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + A filename of one of the resulting dpkg packages. + """ + return u'{0:s}_{1!s}-1_{2:s}.deb'.format( + source_helper.project_name, source_helper.project_version, + self.architecture) + + +class PythonModuleDpkgBuildHelper(DpkgBuildHelper): + """Class that helps in building python module dpkg packages (.deb).""" + + def Build(self, source_helper): + """Builds the dpkg packages. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + True if the build was successful, False otherwise. + """ + source_filename = source_helper.Download() + logging.info(u'Building deb of: {0:s}'.format(source_filename)) + + source_directory = source_helper.Create() + if not source_directory: + logging.error( + u'Extraction of source package: {0:s} failed'.format(source_filename)) + return False + + dpkg_directory = os.path.join(source_directory, u'dpkg') + if not os.path.exists(dpkg_directory): + dpkg_directory = os.path.join(source_directory, u'config', u'dpkg') + + if not os.path.exists(dpkg_directory): + # Generate the dpkg build files if necessary. + os.chdir(source_directory) + + build_files_generator = PythonModuleDpkgBuildFilesGenerator( + source_helper.project_name, source_helper.project_version, + self._dependency_definition) + build_files_generator.GenerateFiles(u'dpkg') + + os.chdir(u'..') + + dpkg_directory = os.path.join(source_directory, u'dpkg') + + if not os.path.exists(dpkg_directory): + logging.error(u'Missing dpkg sub directory in: {0:s}'.format( + source_directory)) + return False + + debian_directory = os.path.join(source_directory, u'debian') + + # If there is a debian directory remove it and recreate it from + # the dpkg directory. + if os.path.exists(debian_directory): + logging.info(u'Removing: {0:s}'.format(debian_directory)) + shutil.rmtree(debian_directory) + shutil.copytree(dpkg_directory, debian_directory) + + if not self._BuildPrepare(source_directory): + return False + + command = u'dpkg-buildpackage -uc -us -rfakeroot > {0:s} 2>&1'.format( + os.path.join(u'..', self.LOG_FILENAME)) + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + if not self._BuildFinalize(source_directory): + return False + + return True + + def Clean(self, source_helper): + """Cleans the dpkg packages in the current directory. + + Args: + source_helper: the source helper (instance of SourceHelper). + """ + filenames_to_ignore = re.compile(u'^python-{0:s}[-_].*{1!s}'.format( + source_helper.project_name, source_helper.project_version)) + + # Remove files of previous versions in the format: + # python-{project name}[-_]{project version}-1_architecture.* + filenames = glob.glob( + u'python-{0:s}[-_]*-1_all.*'.format(source_helper.project_name)) + + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + # Remove files of previous versions in the format: + # python-{project name}[-_]*version-1.* + filenames = glob.glob( + u'python-{0:s}[-_]*-1.*'.format(source_helper.project_name)) + + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + def GetOutputFilename(self, source_helper): + """Retrieves the filename of one of the resulting files. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + A filename of one of the resulting dpkg packages. + """ + if self._dependency_definition.dpkg_name: + project_name = self._dependency_definition.dpkg_name + else: + project_name = source_helper.project_name + + return u'python-{0:s}_{1!s}-1_all.deb'.format( + project_name, source_helper.project_version) + + +class MsiBuildHelper(BuildHelper): + """Class that helps in building Microsoft Installer packages (.msi).""" + + def __init__(self, dependency_definition): + """Initializes the build helper. + + Args: + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + """ + super(MsiBuildHelper, self).__init__(dependency_definition) + self.architecture = platform.machine() + + if self.architecture == 'x86': + self.architecture = 'win32' + elif self.architecture == 'AMD64': + self.architecture = 'win-amd64' + + +class LibyalMsiBuildHelper(MsiBuildHelper): + """Class that helps in building Microsoft Installer packages (.msi).""" + + def __init__(self, dependency_definition): + """Initializes the build helper. + + Args: + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + + Raises: + RuntimeError: if the Visual Studio version could be determined or + msvscpp-convert.py could not be found. + """ + super(LibyalMsiBuildHelper, self).__init__(dependency_definition) + + if os.environ['VS90COMNTOOLS']: + self.version = '2008' + + elif not os.environ['VS100COMNTOOLS']: + self.version = '2010' + + elif not os.environ['VS110COMNTOOLS']: + self.version = '2012' + + elif not os.environ['VS120COMNTOOLS']: + self.version = '2013' + + else: + raise RuntimeError(u'Unable to determine Visual Studio version.') + + if self.version != '2008': + self._msvscpp_convert = os.path.join( + os.path.dirname(__file__), u'msvscpp-convert.py') + + if not os.path.exists(self._msvscpp_convert): + raise RuntimeError(u'Unable to find msvscpp-convert.py') + + def _BuildPrepare(self, source_directory): + """Prepares the source for building with Visual Studio. + + Args: + source_directory: the name of the source directory. + """ + # For the vs2008 build make sure the binary is XP compatible, + # by setting WINVER to 0x0501. For the vs2010 build WINVER is + # set to 0x0600 (Windows Vista). + + # WINVER is set in common\config_winapi.h or common\config_msc.h. + config_filename = os.path.join( + source_directory, u'common', u'config_winapi.h') + + # If the WINAPI configuration file is not available use + # the MSC compiler configuration file instead. + if not os.path.exists(config_filename): + config_filename = os.path.join( + source_directory, u'common', u'config_msc.h') + + # Add a line to the config file that sets WINVER. + parsing_mode = 0 + + for line in fileinput.input(config_filename, inplace=1): + # Remove trailing whitespace and end-of-line characters. + line = line.rstrip() + + if parsing_mode != 2 or line: + if parsing_mode == 1: + if self.version == '2008': + if not line.startswith('#define WINVER 0x0501'): + print '#define WINVER 0x0501' + print '' + + else: + if not line.startswith('#define WINVER 0x0600'): + print '#define WINVER 0x0600' + print '' + + parsing_mode = 2 + + elif line.startswith('#define _CONFIG_'): + parsing_mode = 1 + + print line + + def _ConvertSolutionFiles(self, source_directory): + """Converts the Visual Studio solution and project files. + + Args: + source_directory: the name of the source directory. + """ + os.chdir(source_directory) + + solution_filenames = glob.glob(os.path.join(u'msvscpp', u'*.sln')) + if len(solution_filenames) != 1: + logging.error(u'Unable to find Visual Studio solution file') + return False + + solution_filename = solution_filenames[0] + + if not os.path.exists(u'vs2008'): + command = u'{0:s} {1:s} --to {2:s} {3:s}'.format( + sys.executable, self._msvscpp_convert, self.version, + solution_filename) + exit_code = subprocess.call(command, shell=False) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + # Note that setup.py needs the Visual Studio solution directory + # to be named: msvscpp. So replace the Visual Studio 2008 msvscpp + # solution directory with the converted one. + os.rename(u'msvscpp', u'vs2008') + os.rename(u'vs{0:s}'.format(self.version), u'msvscpp') + + os.chdir(u'..') + + def Build(self, source_helper): + """Builds using Visual Studio. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + True if the build was successful, False otherwise. + """ + source_filename = source_helper.Download() + logging.info(u'Building: {0:s} with Visual Studio {1:s}'.format( + source_filename, self.version)) + + source_directory = source_helper.Create() + if not source_directory: + logging.error( + u'Extraction of source package: {0:s} failed'.format(source_filename)) + return False + + # Search common locations for MSBuild.exe + if self.version == '2008': + msbuild = u'{0:s}:{1:s}{2:s}'.format( + u'C', os.sep, os.path.join( + u'Windows', u'Microsoft.NET', u'Framework', u'v3.5', + u'MSBuild.exe')) + + # Note that MSBuild in .NET 3.5 does not support vs2010 solution files + # and MSBuild in .NET 4.0 is needed instead. + elif self.version in ['2010', '2012', '2013']: + msbuild = u'{0:s}:{1:s}{2:s}'.format( + u'C', os.sep, os.path.join( + u'Windows', u'Microsoft.NET', u'Framework', u'v4.0.30319', + u'MSBuild.exe')) + + if not os.path.exists(msbuild): + logging.error(u'Unable to find MSBuild.exe') + return False + + if self.version == '2008': + if not os.environ['VS90COMNTOOLS']: + logging.error(u'Missing VS90COMNTOOLS environment variable.') + return False + + elif self.version == '2010': + if not os.environ['VS100COMNTOOLS']: + logging.error(u'Missing VS100COMNTOOLS environment variable.') + return False + + elif self.version == '2012': + if not os.environ['VS110COMNTOOLS']: + logging.error(u'Missing VS110COMNTOOLS environment variable.') + return False + + elif self.version == '2013': + if not os.environ['VS120COMNTOOLS']: + logging.error(u'Missing VS120COMNTOOLS environment variable.') + return False + + # For the Visual Studio builds later than 2008 the convert the 2008 + # solution and project files need to be converted to the newer version. + if self.version in ['2010', '2012', '2013']: + self._ConvertSolutionFiles(source_directory) + + self._BuildPrepare(source_directory) + + # Detect architecture based on Visual Studion Platform environment + # variable. If not set the platform with default to Win32. + msvscpp_platform = os.environ.get('Platform', None) + if not msvscpp_platform: + msvscpp_platform = os.environ.get('TARGET_CPU', None) + + if not msvscpp_platform or msvscpp_platform == 'x86': + msvscpp_platform = 'Win32' + + if msvscpp_platform not in ['Win32', 'x64']: + logging.error(u'Unsupported build platform: {0:s}'.format( + msvscpp_platform)) + return False + + if self.version == '2008' and msvscpp_platform == 'x64': + logging.error(u'Unsupported 64-build platform for vs2008.') + return False + + solution_filenames = glob.glob(os.path.join( + source_directory, u'msvscpp', u'*.sln')) + if len(solution_filenames) != 1: + logging.error(u'Unable to find Visual Studio solution file') + return False + + solution_filename = solution_filenames[0] + + command = ( + u'{0:s} /p:Configuration=Release /p:Platform={1:s} /noconsolelogger ' + u'/fileLogger /maxcpucount {2:s}').format( + msbuild, msvscpp_platform, solution_filename) + exit_code = subprocess.call(command, shell=False) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + python_module_name, _, _ = source_directory.partition(u'-') + python_module_name = u'py{0:s}'.format(python_module_name[3:]) + python_module_directory = os.path.join( + source_directory, python_module_name) + python_module_dist_directory = os.path.join( + python_module_directory, u'dist') + + if not os.path.exists(python_module_dist_directory): + build_directory = os.path.join(u'..', u'..') + + os.chdir(python_module_directory) + + # Setup.py uses VS90COMNTOOLS which is vs2008 specific + # so we need to set it for the other Visual Studio versions. + if self.version == '2010': + os.environ['VS90COMNTOOLS'] = os.environ['VS100COMNTOOLS'] + + elif self.version == '2012': + os.environ['VS90COMNTOOLS'] = os.environ['VS110COMNTOOLS'] + + elif self.version == '2013': + os.environ['VS90COMNTOOLS'] = os.environ['VS120COMNTOOLS'] + + command = u'{0:s} setup.py bdist_msi'.format(sys.executable) + exit_code = subprocess.call(command, shell=False) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + # Move the msi to the build directory. + msi_filename = glob.glob(os.path.join( + u'dist', u'{0:s}-*.msi'.format(python_module_name))) + + logging.info(u'Moving: {0:s}'.format(msi_filename[0])) + shutil.move(msi_filename[0], build_directory) + + os.chdir(build_directory) + + return True + + def Clean(self, source_helper): + """Cleans the build and dist directory. + + Args: + source_helper: the source helper (instance of SourceHelper). + """ + # Remove previous versions of msis. + filenames_to_ignore = re.compile(u'{0:s}-.*{1!s}.1.{2:s}-py2.7.msi'.format( + source_helper.project_name, source_helper.project_version, + self.architecture)) + + msi_filenames_glob = u'{0:s}-*.1.{1:s}-py2.7.msi'.format( + source_helper.project_name, self.architecture) + + filenames = glob.glob(msi_filenames_glob) + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + def GetOutputFilename(self, source_helper): + """Retrieves the filename of one of the resulting files. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + A filename of one of the resulting msis. + """ + return u'{0:s}-{1!s}.1.{2:s}-py2.7.msi'.format( + source_helper.project_name, source_helper.project_version, + self.architecture) + + +class PythonModuleMsiBuildHelper(MsiBuildHelper): + """Class that helps in building Microsoft Installer packages (.msi).""" + + def Build(self, source_helper): + """Builds the msi. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + True if the build was successful, False otherwise. + """ + source_filename = source_helper.Download() + logging.info(u'Building msi of: {0:s}'.format(source_filename)) + + source_directory = source_helper.Create() + if not source_directory: + logging.error( + u'Extraction of source package: {0:s} failed'.format(source_filename)) + return False + + command = u'{0:s} setup.py bdist_msi > {1:s} 2>&1'.format( + sys.executable, os.path.join(u'..', self.LOG_FILENAME)) + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + # Move the msi to the build directory. + msi_filename = glob.glob(os.path.join( + source_directory, u'dist', u'{0:s}-*.msi'.format( + source_helper.project_name))) + + logging.info(u'Moving: {0:s}'.format(msi_filename[0])) + shutil.move(msi_filename[0], '.') + + return True + + def Clean(self, source_helper): + """Cleans the build and dist directory. + + Args: + source_helper: the source helper (instance of SourceHelper). + """ + # Remove previous versions build directories. + for filename in [u'build', u'dist']: + if os.path.exists(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + shutil.rmtree(filename, True) + + # Remove previous versions of msis. + filenames_to_ignore = re.compile(u'{0:s}-.*{1!s}.{2:s}.msi'.format( + source_helper.project_name, source_helper.project_version, + self.architecture)) + + msi_filenames_glob = u'{0:s}-*.{1:s}.msi'.format( + source_helper.project_name, self.architecture) + + filenames = glob.glob(msi_filenames_glob) + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + def GetOutputFilename(self, source_helper): + """Retrieves the filename of one of the resulting files. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + A filename of one of the resulting msis. + """ + # TODO: this does not work for dfvfs at the moment. Fix this. + return u'{0:s}-{1!s}.{2:s}.msi'.format( + source_helper.project_name, source_helper.project_version, + self.architecture) + + +class PkgBuildHelper(BuildHelper): + """Class that helps in building MacOS-X packages (.pkg).""" + + def __init__(self, dependency_definition): + """Initializes the build helper. + + Args: + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + """ + super(PkgBuildHelper, self).__init__(dependency_definition) + self._pkgbuild = os.path.join(u'/', u'usr', u'bin', u'pkgbuild') + + def _BuildDmg(self, pkg_filename, dmg_filename): + """Builds the distributable disk image (.dmg) from the pkg. + + Args: + pkg_filename: the name of the pkg file (which is technically + a directory). + dmg_filename: the name of the dmg file. + + Returns: + True if the build was successful, False otherwise. + """ + command = ( + u'hdiutil create {0:s} -srcfolder {1:s} -fs HFS+').format( + dmg_filename, pkg_filename) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + return True + + def _BuildPkg( + self, source_directory, project_identifier, project_version, + pkg_filename): + """Builds the distributable disk image (.dmg) from the pkg. + + Args: + source_directory: the name of the source directory. + project_identifier: the project identifier. + project_version: the version of the project. + pkg_filename: the name of the pkg file (which is technically + a directory). + + Returns: + True if the build was successful, False otherwise. + """ + command = ( + u'{0:s} --root {1:s}/tmp/ --identifier {2:s} ' + u'--version {3!s} --ownership recommended {4:s}').format( + self._pkgbuild, source_directory, project_identifier, + project_version, pkg_filename) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + return True + + def Clean(self, source_helper): + """Cleans the MacOS-X packages in the current directory. + + Args: + source_helper: the source helper (instance of SourceHelper). + """ + filenames_to_ignore = re.compile(u'^{0:s}-.*{1!s}'.format( + source_helper.project_name, source_helper.project_version)) + + # Remove files of previous versions in the format: + # project-*version.dmg + filenames = glob.glob(u'{0:s}-*.dmg'.format(source_helper.project_name)) + + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + # Remove files of previous versions in the format: + # project-*version.pkg + filenames = glob.glob(u'{0:s}-*.pkg'.format(source_helper.project_name)) + + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + def GetOutputFilename(self, source_helper): + """Retrieves the filename of one of the resulting files. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + A filename of one of the resulting rpms. + """ + return u'{0:s}-{1!s}.dmg'.format( + source_helper.project_name, source_helper.project_version) + + +class LibyalPkgBuildHelper(PkgBuildHelper): + """Class that helps in building MacOS-X packages (.pkg).""" + + def Build(self, source_helper): + """Builds the pkg package and distributable disk image (.dmg). + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + True if the build was successful, False otherwise. + """ + source_filename = source_helper.Download() + logging.info(u'Building pkg of: {0:s}'.format(source_filename)) + + source_directory = source_helper.Create() + if not source_directory: + logging.error( + u'Extraction of source package: {0:s} failed'.format(source_filename)) + return False + + dmg_filename = u'{0:s}-{1!s}.dmg'.format( + source_helper.project_name, source_helper.project_version) + pkg_filename = u'{0:s}-{1!s}.pkg'.format( + source_helper.project_name, source_helper.project_version) + log_filename = os.path.join(u'..', self.LOG_FILENAME) + + sdks_path = os.path.join( + u'/', u'Applications', u'Xcode.app', u'Contents', u'Developer', + u'Platforms', u'MacOSX.platform', u'Developer', u'SDKs') + + for sub_path in [u'MacOSX10.7.sdk', u'MacOSX10.8.sdk', u'MacOSX10.9.sdk']: + sdk_path = os.path.join(sdks_path, sub_path) + if os.path.isdir(sub_path): + break + + if sdk_path: + cflags = u'CFLAGS="-isysroot {0:s}"'.format(sdk_path) + ldflags = u'LDFLAGS="-Wl,-syslibroot,{0:s}"'.format(sdk_path) + else: + cflags = u'' + ldflags = u'' + + if not os.path.exists(pkg_filename): + if cflags and ldflags: + command = ( + u'{0:s} {1:s} ./configure --prefix=/usr --enable-python ' + u'--with-pyprefix --disable-dependency-tracking > {2:s} ' + u'2>&1').format(cflags, ldflags, log_filename) + else: + command = ( + u'./configure --prefix=/usr --enable-python --with-pyprefix ' + u'> {0:s} 2>&1').format(log_filename) + + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + command = u'make >> {0:s} 2>&1'.format(log_filename) + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + command = u'make install DESTDIR={0:s}/tmp >> {1:s} 2>&1'.format( + os.path.abspath(source_directory), log_filename) + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + share_doc_path = os.path.join( + source_directory, u'tmp', u'usr', u'share', u'doc', + source_helper.project_name) + if not os.path.exists(share_doc_path): + os.makedirs(share_doc_path) + + shutil.copy(os.path.join(source_directory, u'AUTHORS'), share_doc_path) + shutil.copy(os.path.join(source_directory, u'COPYING'), share_doc_path) + shutil.copy(os.path.join(source_directory, u'NEWS'), share_doc_path) + shutil.copy(os.path.join(source_directory, u'README'), share_doc_path) + + project_identifier = u'com.github.libyal.{0:s}'.format( + source_helper.project_name) + if not self._BuildPkg( + source_directory, project_identifier, source_helper.project_version, + pkg_filename): + return False + + if not self._BuildDmg(pkg_filename, dmg_filename): + return False + + return True + + +class PythonModulePkgBuildHelper(PkgBuildHelper): + """Class that helps in building MacOS-X packages (.pkg).""" + + def Build(self, source_helper): + """Builds the pkg package and distributable disk image (.dmg). + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + True if the build was successful, False otherwise. + """ + source_filename = source_helper.Download() + logging.info(u'Building pkg of: {0:s}'.format(source_filename)) + + source_directory = source_helper.Create() + if not source_directory: + logging.error( + u'Extraction of source package: {0:s} failed'.format(source_filename)) + return False + + dmg_filename = u'{0:s}-{1!s}.dmg'.format( + source_helper.project_name, source_helper.project_version) + pkg_filename = u'{0:s}-{1!s}.pkg'.format( + source_helper.project_name, source_helper.project_version) + log_filename = os.path.join(u'..', self.LOG_FILENAME) + + if not os.path.exists(pkg_filename): + command = u'python setup.py build > {0:s} 2>&1'.format(log_filename) + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + command = u'python setup.py install --root={0:s}/tmp > {1:s} 2>&1'.format( + os.path.abspath(source_directory), log_filename) + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + # Copy the license file to the egg-info sub directory. + for license_file in [ + u'COPYING', u'LICENSE', u'LICENSE.TXT', u'LICENSE.txt']: + if not os.path.exists(os.path.join(source_directory, license_file)): + continue + + command = ( + u'find ./tmp -type d -name \\*.egg-info -exec cp {0:s} {{}} ' + u'\\;').format(license_file) + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), + shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + project_identifier = source_helper.GetProjectIdentifier() + if not self._BuildPkg( + source_directory, project_identifier, source_helper.project_version, + pkg_filename): + return False + + if not self._BuildDmg(pkg_filename, dmg_filename): + return False + + return True + + +class RpmBuildHelper(BuildHelper): + """Class that helps in building rpm packages (.rpm).""" + + # TODO: determine BUILD_DEPENDENCIES from the build files? + _BUILD_DEPENDENCIES = frozenset([ + 'git', + 'binutils', + 'autoconf', + 'automake', + 'libtool', + 'gettext-devel', + 'make', + 'pkgconfig', + 'gcc', + 'gcc-c++', + 'flex', + 'byacc', + 'zlib-devel', + 'bzip2-devel', + 'openssl-devel', + 'fuse-devel', + 'rpm-build', + 'python-devel', + 'git', + 'python-dateutil', + 'python-setuptools', + 'sqlite-devel', + ]) + + def __init__(self, dependency_definition): + """Initializes the build helper. + + Args: + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + """ + super(RpmBuildHelper, self).__init__(dependency_definition) + self.architecture = platform.machine() + + self.rpmbuild_path = os.path.join(u'~', u'rpmbuild') + self.rpmbuild_path = os.path.expanduser(self.rpmbuild_path) + + self._rpmbuild_rpms_path = os.path.join( + self.rpmbuild_path, u'RPMS', self.architecture) + self._rpmbuild_sources_path = os.path.join(self.rpmbuild_path, u'SOURCES') + self._rpmbuild_specs_path = os.path.join(self.rpmbuild_path, u'SPECS') + + def _BuildFromSpecFile(self, spec_filename): + """Builds the rpms directly from a spec file. + + Args: + spec_filename: the name of the spec file as stored in the rpmbuild + SPECS sub directory. + + Returns: + True if the build was successful, False otherwise. + """ + current_path = os.getcwd() + os.chdir(self.rpmbuild_path) + + command = u'rpmbuild -ba {0:s} > {1:s} 2>&1'.format( + os.path.join(u'SPECS', spec_filename), self.LOG_FILENAME) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + + os.chdir(current_path) + + return exit_code == 0 + + def _BuildFromSourcePackage(self, source_filename): + """Builds the rpms directly from the source package file. + + For this to work the source package needs to contain a valid rpm .spec file. + + Args: + source_filename: the name of the source package file. + + Returns: + True if the build was successful, False otherwise. + """ + command = u'rpmbuild -ta {0:s} > {1:s} 2>&1'.format( + source_filename, self.LOG_FILENAME) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + return True + + def _CreateRpmbuildDirectories(self): + """Creates the rpmbuild and sub directories.""" + if not os.path.exists(self.rpmbuild_path): + os.mkdir(self.rpmbuild_path) + + if not os.path.exists(self._rpmbuild_sources_path): + os.mkdir(self._rpmbuild_sources_path) + + if not os.path.exists(self._rpmbuild_specs_path): + os.mkdir(self._rpmbuild_specs_path) + + def _CreateSpecFile(self, project_name, spec_file_data): + """Creates a spec file in the rpmbuild directory. + + Args: + project_name: the name of the project. + spec_file_data: the spec file data. + """ + spec_filename = os.path.join( + self._rpmbuild_specs_path, u'{0:s}.spec'.format(project_name)) + + spec_file = open(spec_filename, 'w') + spec_file.write(spec_file_data) + spec_file.close() + + def _CopySourceFile(self, source_filename): + """Copies the source file to the rpmbuild directory. + + Args: + source_filename: the name of the source package file. + """ + shutil.copy(source_filename, self._rpmbuild_sources_path) + + def _MoveRpms(self, project_name, project_version): + """Moves the rpms from the rpmbuild directory into to current directory. + + Args: + project_name: the name of the project. + project_version: the version of the project. + """ + filenames = glob.glob(os.path.join( + self._rpmbuild_rpms_path, u'{0:s}-*{1!s}-1.{2:s}.rpm'.format( + project_name, project_version, self.architecture))) + for filename in filenames: + logging.info(u'Moving: {0:s}'.format(filename)) + shutil.move(filename, '.') + + @classmethod + def CheckBuildDependencies(cls): + """Checks if the build dependencies are met. + + Returns: + A list of package names that need to be installed or an empty list. + """ + missing_packages = [] + for package_name in cls._BUILD_DEPENDENCIES: + if not cls.CheckIsInstalled(package_name): + missing_packages.append(package_name) + + return missing_packages + + @classmethod + def CheckIsInstalled(cls, package_name): + """Checks if a package is installed. + + Args: + package_name: the name of the package. + + Returns: + A boolean value containing true if the package is installed + false otherwise. + """ + command = u'rpm -qi {0:s} >/dev/null 2>&1'.format(package_name) + exit_code = subprocess.call(command, shell=True) + return exit_code == 0 + + def Clean(self, source_helper): + """Cleans the rpmbuild directory. + + Args: + source_helper: the source helper (instance of SourceHelper). + """ + # Remove previous versions build directories. + filenames_to_ignore = re.compile(u'{0:s}-{1!s}'.format( + source_helper.project_name, source_helper.project_version)) + + filenames = glob.glob(os.path.join( + self.rpmbuild_path, u'BUILD', u'{0:s}-*'.format( + source_helper.project_name))) + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + shutil.rmtree(filename) + + # Remove previous versions of rpms. + filenames_to_ignore = re.compile( + u'{0:s}-.*{1!s}-1.{2:s}.rpm'.format( + source_helper.project_name, source_helper.project_version, + self.architecture)) + + rpm_filenames_glob = u'{0:s}-*-1.{1:s}.rpm'.format( + source_helper.project_name, self.architecture) + + filenames = glob.glob(rpm_filenames_glob) + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + filenames = glob.glob(os.path.join( + self.rpmbuild_path, u'RPMS', self.architecture, rpm_filenames_glob)) + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + # Remove previous versions of source rpms. + filenames_to_ignore = re.compile(u'{0:s}-.*{1!s}-1.src.rpm'.format( + source_helper.project_name, source_helper.project_version)) + + filenames = glob.glob(os.path.join( + self.rpmbuild_path, u'SRPMS', + u'{0:s}-*-1.src.rpm'.format(source_helper.project_name))) + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + def GetOutputFilename(self, source_helper): + """Retrieves the filename of one of the resulting files. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + A filename of one of the resulting rpms. + """ + return u'{0:s}-{1!s}-1.{2:s}.rpm'.format( + source_helper.project_name, source_helper.project_version, + self.architecture) + + +class LibyalRpmBuildHelper(RpmBuildHelper): + """Class that helps in building libyal rpm packages (.rpm).""" + + def Build(self, source_helper): + """Builds the rpms. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + True if the build was successful, False otherwise. + """ + source_filename = source_helper.Download() + logging.info(u'Building rpm of: {0:s}'.format(source_filename)) + + # rpmbuild wants the library filename without the status indication. + rpm_source_filename = u'{0:s}-{1!s}.tar.gz'.format( + source_helper.project_name, source_helper.project_version) + os.rename(source_filename, rpm_source_filename) + + build_successful = self._BuildFromSourcePackage(rpm_source_filename) + + if build_successful: + # Move the rpms to the build directory. + self._MoveRpms(source_helper.project_name, source_helper.project_version) + + # Remove BUILD directory. + filename = os.path.join( + self.rpmbuild_path, u'BUILD', u'{0:s}-{1!s}'.format( + source_helper.project_name, source_helper.project_version)) + logging.info(u'Removing: {0:s}'.format(filename)) + shutil.rmtree(filename) + + # Remove SRPMS file. + filename = os.path.join( + self.rpmbuild_path, u'SRPMS', u'{0:s}-{1!s}-1.src.rpm'.format( + source_helper.project_name, source_helper.project_version)) + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + # Change the library filename back to the original. + os.rename(rpm_source_filename, source_filename) + + return build_successful + + +class PythonModuleRpmBuildHelper(RpmBuildHelper): + """Class that helps in building rpm packages (.rpm).""" + + def __init__(self, dependency_definition): + """Initializes the build helper. + + Args: + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + """ + super(PythonModuleRpmBuildHelper, self).__init__(dependency_definition) + self.architecture = 'noarch' + + def Build(self, source_helper): + """Builds the rpms. + + Args: + source_helper: the source helper (instance of SourceHelper). + + Returns: + True if the build was successful, False otherwise. + """ + source_filename = source_helper.Download() + logging.info(u'Building rpm of: {0:s}'.format(source_filename)) + + source_directory = source_helper.Create() + if not source_directory: + logging.error( + u'Extraction of source package: {0:s} failed'.format(source_filename)) + return False + + command = u'python setup.py bdist_rpm > {0:s} 2>&1'.format( + os.path.join(u'..', self.LOG_FILENAME)) + exit_code = subprocess.call( + u'(cd {0:s} && {1:s})'.format(source_directory, command), shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + # Move the rpms to the build directory. + filenames = glob.glob(os.path.join( + source_directory, u'dist', u'{0:s}-{1!s}-1.{2:s}.rpm'.format( + source_helper.project_name, source_helper.project_version, + self.architecture))) + for filename in filenames: + logging.info(u'Moving: {0:s}'.format(filename)) + shutil.move(filename, '.') + + return True + + def Clean(self, source_helper): + """Cleans the build and dist directory. + + Args: + source_helper: the source helper (instance of SourceHelper). + """ + # Remove previous versions build directories. + for filename in [u'build', u'dist']: + if os.path.exists(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + shutil.rmtree(filename, True) + + # Remove previous versions of rpms. + filenames_to_ignore = re.compile(u'{0:s}-.*{1!s}-1.{2:s}.rpm'.format( + source_helper.project_name, source_helper.project_version, + self.architecture)) + + rpm_filenames_glob = u'{0:s}-*-1.{1:s}.rpm'.format( + source_helper.project_name, self.architecture) + + filenames = glob.glob(rpm_filenames_glob) + for filename in filenames: + if not filenames_to_ignore.match(filename): + logging.info(u'Removing: {0:s}'.format(filename)) + os.remove(filename) + + +class DependencyBuilder(object): + """Class that helps in building dependencies.""" + + _LIBYAL_LIBRARIES = frozenset([ + 'libbde', 'libesedb', 'libevt', 'libevtx', 'libewf', 'libfwsi', 'liblnk', + 'libmsiecf', 'libolecf', 'libqcow', 'libregf', 'libsmdev', 'libsmraw', + 'libvhdi', 'libvmdk', 'libvshadow']) + + _PATCHES_URL = ( + u'https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg/' + u'3rd%20party/patches') + + _PYTHON_MODULES = frozenset([ + 'bencode', 'binplist', 'construct', 'dfvfs', 'dpkt', 'pyparsing', + 'pysqlite', 'pytz', 'PyYAML', 'six']) + + def __init__(self, build_target): + """Initializes the dependency builder. + + Args: + build_target: the build target. + """ + super(DependencyBuilder, self).__init__() + self._build_target = build_target + + def _BuildDependency( + self, download_helper, dependency_definition): + """Builds a dependency. + + Args: + download_helper: the download helper (instance of DownloadHelper). + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + + Returns: + True if the build is successful or False on error. + """ + source_helper = SourcePackageHelper( + download_helper, dependency_definition.name) + + source_helper.Clean() + + if self._build_target == 'download': + source_filename = source_helper.Download() + + # If available run the script post-download.sh after download. + if os.path.exists(u'post-download.sh'): + command = u'sh ./post-download.sh {0:s}'.format(source_filename) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + # TODO + elif dependency_definition.name in self._LIBYAL_LIBRARIES: + if not self._BuildLibyalLibrary(source_helper, dependency_definition): + return False + + elif dependency_definition.name in self._PYTHON_MODULES: + if not self._BuildPythonModule(source_helper, dependency_definition): + return False + + else: + return False + + return True + + def _BuildLibyalLibrary(self, source_helper, dependency_definition): + """Builds a libyal library and its Python module dependency. + + Args: + source_helper: the source helper (instance of SourceHelper). + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + + Returns: + True if the build is successful or False on error. + """ + build_helper = None + if self._build_target == 'dpkg': + build_helper = LibyalDpkgBuildHelper(dependency_definition) + + elif self._build_target in ['msi']: + # TODO: setup dokan and zlib in build directory. + build_helper = LibyalMsiBuildHelper(dependency_definition) + + elif self._build_target == 'pkg': + build_helper = LibyalPkgBuildHelper(dependency_definition) + + elif self._build_target == 'rpm': + build_helper = LibyalRpmBuildHelper(dependency_definition) + + if not build_helper: + return False + + output_filename = build_helper.GetOutputFilename(source_helper) + + build_helper.Clean(source_helper) + + if not os.path.exists(output_filename): + if not build_helper.Build(source_helper): + logging.warning(( + u'Build of: {0:s} failed, for more information check ' + u'{1:s}').format( + source_helper.project_name, build_helper.LOG_FILENAME)) + return False + + if os.path.exists(build_helper.LOG_FILENAME): + logging.info(u'Removing: {0:s}'.format(build_helper.LOG_FILENAME)) + os.remove(build_helper.LOG_FILENAME) + + return True + + def _BuildPythonModule(self, source_helper, dependency_definition): + """Builds a Python module dependency. + + Args: + source_helper: the source helper (instance of SourceHelper). + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + + Returns: + True if the build is successful or False on error. + """ + build_helper = None + if self._build_target == 'dpkg': + build_helper = PythonModuleDpkgBuildHelper(dependency_definition) + + elif self._build_target in ['msi']: + # TODO: setup sqlite in build directory. + build_helper = PythonModuleMsiBuildHelper(dependency_definition) + + elif self._build_target == 'pkg': + build_helper = PythonModulePkgBuildHelper(dependency_definition) + + elif self._build_target == 'rpm': + build_helper = PythonModuleRpmBuildHelper(dependency_definition) + + if not build_helper: + return False + + output_filename = build_helper.GetOutputFilename(source_helper) + + build_helper.Clean(source_helper) + + if not os.path.exists(output_filename): + if not build_helper.Build(source_helper): + logging.warning(( + u'Build of: {0:s} failed, for more information check ' + u'{1:s}').format( + source_helper.project_name, build_helper.LOG_FILENAME)) + return False + + if os.path.exists(build_helper.LOG_FILENAME): + logging.info(u'Removing: {0:s}'.format(build_helper.LOG_FILENAME)) + os.remove(build_helper.LOG_FILENAME) + + return True + + def Build(self, dependency_definition): + """Builds a dependency. + + Args: + dependency_definition: the dependency definition object (instance of + DependencyDefinition). + + Returns: + True if the build is successful or False on error. + + Raises: + ValueError: if the project type is unsupported. + """ + download_url = dependency_definition.download_url + if download_url.endswith(u'/'): + download_url = download_url[:-1] + + # Unify http:// and https:// URLs for the download helper check. + if download_url.startswith(u'https://'): + download_url = u'http://{0:s}'.format(download_url[8:]) + + if (download_url.startswith(u'http://code.google.com/p/') and + download_url.endswith(u'/downloads/list')): + download_helper = GoogleCodeWikiDownloadHelper() + + elif download_url.startswith(u'http://pypi.python.org/pypi/'): + download_helper = PyPiDownloadHelper() + + elif (download_url.startswith(u'http://sourceforge.net/projects/') and + download_url.endswith(u'/files')): + download_helper = SourceForgeDownloadHelper() + + # TODO: make this a more generic github download helper when + # when Google Drive support is no longer needed. + elif (download_url.startswith(u'http://github.com/libyal/') or + download_url.startswith(u'http://googledrive.com/host/')): + download_helper = LibyalGitHubDownloadHelper() + + elif download_url.startswith(u'http://github.com/log2timeline/'): + download_helper = Log2TimelineGitHubDownloadHelper() + + else: + raise ValueError(u'Unsupported downloads URL.') + + return self._BuildDependency(download_helper, dependency_definition) + + +def Main(): + build_targets = frozenset(['download', 'dpkg', 'msi', 'pkg', 'rpm']) + + args_parser = argparse.ArgumentParser(description=( + 'Downloads and builds the latest versions of plaso dependencies.')) + + args_parser.add_argument( + 'build_target', choices=sorted(build_targets), action='store', + metavar='BUILD_TARGET', default=None, help='The build target.') + + args_parser.add_argument( + '--build-directory', '--build_directory', action='store', + metavar='DIRECTORY', dest='build_directory', type=unicode, + default=u'dependencies', help=( + u'The location of the the build directory.')) + + args_parser.add_argument( + '-c', '--config', dest='config_file', action='store', + metavar='CONFIG_FILE', default=None, + help='path of the build configuration file.') + + options = args_parser.parse_args() + + if not options.build_target: + print u'Build target missing.' + print u'' + args_parser.print_help() + print u'' + return False + + if options.build_target not in build_targets: + print u'Unsupported build target: {0:s}.'.format(options.build_target) + print u'' + args_parser.print_help() + print u'' + return False + + if not options.config_file: + options.config_file = os.path.join( + os.path.dirname(__file__), 'dependencies.ini') + + if not os.path.exists(options.config_file): + print u'No such config file: {0:s}.'.format(options.config_file) + print u'' + return False + + logging.basicConfig( + level=logging.INFO, format=u'[%(levelname)s] %(message)s') + + if options.build_target == 'dpkg': + missing_packages = DpkgBuildHelper.CheckBuildDependencies() + if missing_packages: + print (u'Required build package(s) missing. Please install: ' + u'{0:s}.'.format(u', '.join(missing_packages))) + print u'' + return False + + elif options.build_target == 'rpm': + missing_packages = RpmBuildHelper.CheckBuildDependencies() + if missing_packages: + print (u'Required build package(s) missing. Please install: ' + u'{0:s}.'.format(u', '.join(missing_packages))) + print u'' + return False + + dependency_builder = DependencyBuilder(options.build_target) + + # TODO: allow for patching e.g. dpkt 1.8. + # Have builder check patches URL. + + # TODO: package ipython. + + # TODO: + # (u'protobuf', DependencyBuilder.PROJECT_TYPE_GOOGLE_CODE_WIKI), + # ./configure + # make + # cd python + # python setup.py build + # python setup.py install --root $PWD/tmp + # + # Build of rpm fails: + # python setup.py bdist_rpm + # + # Solution: use protobuf-python.spec to build + + # TODO: download and build sqlite3 from source? + # http://www.sqlite.org/download.html + # or copy sqlite3.h, .lib and .dll to src/ directory? + + # TODO: rpm build of psutil is broken, fix upstream or add patching. + # (u'psutil', DependencyBuilder.PROJECT_TYPE_PYPI), + + builds = [] + with open(options.config_file) as file_object: + dependency_definition_reader = DependencyDefinitionReader() + for dependency_definition in dependency_definition_reader.Read(file_object): + builds.append(dependency_definition) + + if not os.path.exists(options.build_directory): + os.mkdir(options.build_directory) + + current_working_directory = os.getcwd() + os.chdir(options.build_directory) + + result = True + for dependency_definition in builds: + if not dependency_builder.Build(dependency_definition): + print u'Failed building: {0:s}'.format(dependency_definition.name) + result = False + break + + os.chdir(current_working_directory) + + return result + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/utils/check_dependencies.py b/utils/check_dependencies.py new file mode 100755 index 0000000..48847cd --- /dev/null +++ b/utils/check_dependencies.py @@ -0,0 +1,387 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Script to check for the availability and version of dependencies.""" + +import re +import urllib2 + + +def DownloadPageContent(download_url): + """Downloads the page content. + + Args: + download_url: the URL where to download the page content. + + Returns: + The page content if successful, None otherwise. + """ + if not download_url: + return + + url_object = urllib2.urlopen(download_url) + + if url_object.code != 200: + return + + return url_object.read() + + +def GetLibyalGithubReleasesLatestVersion(library_name): + """Retrieves the latest version number of a libyal library on GitHub releases. + + Args: + library_name: the name of the libyal library. + + Returns: + The latest version for a given libyal library on GitHub releases + or 0 on error. + """ + download_url = ( + u'https://github.com/libyal/{0:s}/releases').format(library_name) + + page_content = DownloadPageContent(download_url) + if not page_content: + return 0 + + # The format of the project download URL is: + # /libyal/{project name}/releases/download/{git tag}/ + # {project name}{status-}{version}.tar.gz + # Note that the status is optional and will be: beta, alpha or experimental. + expression_string = ( + u'/libyal/{0:s}/releases/download/[^/]*/{0:s}-[a-z-]*([0-9]+)' + u'[.]tar[.]gz').format(library_name) + matches = re.findall(expression_string, page_content) + + if not matches: + return 0 + + return int(max(matches)) + + +# TODO: Remove when Google Drive support is no longer needed. +def GetLibyalGoogleDriveLatestVersion(library_name): + """Retrieves the latest version number of a libyal library on Google Drive. + + Args: + library_name: the name of the libyal library. + + Returns: + The latest version for a given libyal library on Google Drive + or 0 on error. + """ + download_url = 'https://code.google.com/p/{0:s}/'.format(library_name) + + page_content = DownloadPageContent(download_url) + if not page_content: + return 0 + + # The format of the library downloads URL is: + # https://googledrive.com/host/{random string}/ + expression_string = ( + ']*>Downloads') + matches = re.findall(expression_string, page_content) + + if not matches or len(matches) != 1: + return 0 + + page_content = DownloadPageContent(matches[0]) + if not page_content: + return 0 + + # The format of the library download URL is: + # /host/{random string}/{library name}-{status-}{version}.tar.gz + # Note that the status is optional and will be: beta, alpha or experimental. + expression_string = '/host/[^/]*/{0:s}-[a-z-]*([0-9]+)[.]tar[.]gz'.format( + library_name) + matches = re.findall(expression_string, page_content) + + if not matches: + return 0 + + return int(max(matches)) + + +def CheckLibyal(libyal_python_modules): + """Checks the availability of libyal libraries. + + Args: + libyal_python_modules: list of libyal python module names. + + Returns: + True if the libyal libraries are available, false otherwise. + """ + connection_error = False + result = True + for module_name, module_version in libyal_python_modules: + try: + module_object = map(__import__, [module_name])[0] + module_loaded = True + except ImportError: + print u'[FAILURE]\tmissing: {0:s}.'.format(module_name) + module_loaded = False + result = False + + if module_loaded: + libyal_name = u'lib{0:s}'.format(module_name[2:]) + + installed_version = int(module_object.get_version()) + try: + latest_version = GetLibyalGithubReleasesLatestVersion(libyal_name) + except urllib2.URLError: + latest_version = 0 + + if not latest_version: + try: + latest_version = GetLibyalGoogleDriveLatestVersion(libyal_name) + except urllib2.URLError: + latest_version = 0 + + if not latest_version: + print ( + u'Unable to determine latest version of {0:s} ({1:s}).\n').format( + libyal_name, module_name) + latest_version = None + connection_error = True + + if module_version is not None and installed_version < module_version: + print ( + u'[FAILURE]\t{0:s} ({1:s}) version: {2:d} is too old, {3:d} or ' + u'later required.').format( + libyal_name, module_name, installed_version, module_version) + result = False + + elif latest_version and installed_version != latest_version: + print ( + u'[INFO]\t\t{0:s} ({1:s}) version: {2:d} installed, ' + u'version: {3:d} available.').format( + libyal_name, module_name, installed_version, latest_version) + + else: + print u'[OK]\t\t{0:s} ({1:s}) version: {2:d}'.format( + libyal_name, module_name, installed_version) + + if connection_error: + print ( + u'[INFO] to check for the latest versions this script needs Internet ' + u'access.') + + return result + + +def CheckPythonModule( + module_name, version_attribute_name, minimum_version, + maximum_version=None): + """Checks the availability of a Python module. + + Args: + module_name: the name of the module. + version_attribute_name: the name of the attribute that contains the module + version. + minimum_version: the minimum required version. + maximum_version: the maximum required version. This attribute is optional + and should only be used if there is a recent API change + that prevents the tool from running if a later version + is used. + + Returns: + True if the Python module is available and conforms to the minimum required + version. False otherwise. + """ + try: + module_object = map(__import__, [module_name])[0] + except ImportError: + print u'[FAILURE]\tmissing: {0:s}.'.format(module_name) + return False + + if version_attribute_name and minimum_version: + module_version = getattr(module_object, version_attribute_name, None) + + if not module_version: + return False + + # Split the version string and convert every digit into an integer. + # A string compare of both version strings will yield an incorrect result. + module_version_map = map(int, module_version.split('.')) + minimum_version_map = map(int, minimum_version.split('.')) + + if module_version_map < minimum_version_map: + print ( + u'[FAILURE]\t{0:s} version: {1:s} is too old, {2:s} or later ' + u'required.').format(module_name, module_version, minimum_version) + return False + + if maximum_version: + maximum_version_map = map(int, maximum_version.split('.')) + if module_version_map > maximum_version_map: + print ( + u'[FAILURE]\t{0:s} version: {1:s} is too recent, {2:s} or earlier ' + u'required.').format(module_name, module_version, maximum_version) + return False + + print u'[OK]\t\t{0:s} version: {1:s}'.format(module_name, module_version) + else: + print u'[OK]\t\t{0:s}'.format(module_name) + + return True + + +def CheckPytsk(): + """Checks the availability of pytsk3. + + Returns: + True if the pytsk3 Python module is available, false otherwise. + """ + module_name = 'pytsk3' + + try: + module_object = map(__import__, [module_name])[0] + except ImportError: + print u'[FAILURE]\tmissing: {0:s}.'.format(module_name) + return False + + minimum_version = '4.1.2' + module_version = module_object.TSK_VERSION_STR + + # Split the version string and convert every digit into an integer. + # A string compare of both version strings will yield an incorrect result. + module_version_map = map(int, module_version.split('.')) + minimum_version_map = map(int, minimum_version.split('.')) + if module_version_map < minimum_version_map: + print ( + u'[FAILURE]\tSleuthKit version: {0:s} is too old, {1:s} or later ' + u'required.').format(module_version, minimum_version) + return False + + print u'[OK]\t\tSleuthKit version: {0:s}'.format(module_version) + + minimum_version = '20140506' + if not hasattr(module_object, 'get_version'): + print u'[FAILURE]\t{0:s} is too old, {1:s} or later required.'.format( + module_name, minimum_version) + return False + + module_version = module_object.get_version() + if module_version < minimum_version: + print ( + u'[FAILURE]\t{0:s} version: {1:s} is too old, {2:s} or later ' + u'required.').format(module_name, module_version, minimum_version) + return False + + print u'[OK]\t\t{0:s} version: {1:s}'.format(module_name, module_version) + + return True + + +if __name__ == '__main__': + check_result = True + print u'Checking availability and versions of plaso dependencies.' + + # The bencode module does not appear to have no version information. + if not CheckPythonModule('bencode', '', ''): + check_result = False + + if not CheckPythonModule('binplist', '__version__', '0.1.4'): + check_result = False + + if not CheckPythonModule('construct', '__version__', '2.5.2'): + check_result = False + + if not CheckPythonModule('dateutil.parser', '', ''): + check_result = False + + if not CheckPythonModule('dfvfs', '__version__', '20141220'): + check_result = False + + if not CheckPythonModule('dpkt', '__version__', '1.8'): + check_result = False + + # The protobuf module does not appear to have version information. + if not CheckPythonModule('google.protobuf', '', ''): + check_result = False + + if not CheckPythonModule('hachoir_core', '__version__', '1.3.3'): + check_result = False + + if not CheckPythonModule('hachoir_parser', '__version__', '1.3.4'): + check_result = False + + if not CheckPythonModule('hachoir_metadata', '__version__', '1.3.3'): + check_result = False + + if not CheckPythonModule('IPython', '__version__', '1.2.1'): + check_result = False + + if not CheckPythonModule('yaml', '__version__', '3.10'): + check_result = False + + if not CheckPythonModule('psutil', '__version__', '1.2.1'): + check_result = False + + if not CheckPythonModule('pyparsing', '__version__', '2.0.2'): + check_result = False + + # TODO: determine the version of pytz. + # pytz uses __version__ but has a different version indicator e.g. 2012d + if not CheckPythonModule('pytz', '', ''): + check_result = False + + if not CheckPythonModule('six', '__version__', '1.1.0'): + check_result = False + + if not CheckPythonModule('sqlite3', 'sqlite_version', '3.7.8'): + check_result = False + + if not CheckPytsk(): + check_result = False + + libyal_check_result = CheckLibyal([ + ('pybde', 20140531), + ('pyesedb', 20140301), + ('pyevt', None), + ('pyevtx', 20141112), + ('pyewf', 20131210), + ('pyfwsi', 20140714), + ('pylnk', 20141026), + ('pymsiecf', 20130317), + ('pyolecf', 20131012), + ('pyqcow', 20131204), + ('pyregf', 20130716), + ('pysmdev', 20140529), + ('pysmraw', 20140612), + ('pyvhdi', 20131210), + ('pyvmdk', 20140421), + ('pyvshadow', 20131209), + ]) + + if not check_result: + build_instructions_url = ( + u'https://sites.google.com/a/kiddaland.net/plaso/developer' + u'/building-the-tool') + + print u'See: {0:s} on how to set up plaso.'.format( + build_instructions_url) + + if not libyal_check_result: + libyal_downloads_url = ( + u'https://googledrive.com/host/0B30H7z4S52FleW5vUHBnblJfcjg' + u'/libyal.html') + + print u'Libyal libraries can be downloaded from here: {0:s}'.format( + libyal_downloads_url) + + print u'' diff --git a/utils/common.sh b/utils/common.sh new file mode 100755 index 0000000..12bcf67 --- /dev/null +++ b/utils/common.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# A small script that contains common functions for code review checks. +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +EXIT_FAILURE=1; +EXIT_SUCCESS=0; + +linter() +{ + # Examples of the output of "git status -s" + # If a file is added: + # A utils/common.sh + # If a file is modified: + # M utils/common.sh + # If a file is renamed: + # R utils/common.sh -> utils/uncommon.sh + # If a file is modified and renamed: + # RM utils/common.sh -> utils/uncommon.sh + AWK_SCRIPT="if (\$1 == \"A\" || \$1 == \"AM\" || \$1 == \"M\" || \$1 == \"MM\") { print \$2; } else if (\$1 == \"R\" || \$1 == \"RM\") { print \$4; }"; + + # First find all files that need linter + FILES=`git status -s | grep -v "^?" | awk "{ ${AWK_SCRIPT} }" | grep "\.py$"`; + + PYLINT_VERSION=`pylint --version 2> /dev/null | grep 'pylint' | sed 's/^pylint \(.*\),/\1/'`; + + RESULT=`echo -e "${PYLINT_VERSION}\n1.1.0" | sort -V | head -n1`; + + if test "${RESULT}" = "${PYLINT_VERSION}"; + then + PYLINTRC="utils/pylintrc"; + else + PYLINTRC="utils/pylintrc-1.1.0"; + fi + LINTER="pylint --rcfile=${PYLINTRC}"; + + echo "Run through pylint."; + + for FILE in ${FILES}; + do + if test "${FILE}" = "setup.py" || test "${FILE}" = "utils/upload.py" ; + then + echo " -- Skipping: ${FILE} --" + continue + fi + + if test `echo ${FILE} | tail -c8` == "_pb2.py" ; + then + echo "Skipping compiled protobufs: ${FILE}" + continue + fi + + echo " -- Checking: ${FILE} --" + $LINTER "${FILE}" + + if test $? -ne 0 ; + then + echo "Fix linter errors before proceeding." + return ${EXIT_FAILURE}; + fi + done + + if test $? -ne 0 ; + then + return ${EXIT_FAILURE}; + fi + + echo "Linter clear."; + + return ${EXIT_SUCCESS}; +} diff --git a/utils/compile_proto.sh b/utils/compile_proto.sh new file mode 100755 index 0000000..38cbda7 --- /dev/null +++ b/utils/compile_proto.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# A small helper script to compile protobufs. +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +compile() +{ + protoc -I=. --python_out=. plaso/proto/$1 +} + +compile plaso_storage.proto diff --git a/utils/create_authors.py b/utils/create_authors.py new file mode 100644 index 0000000..4d80224 --- /dev/null +++ b/utils/create_authors.py @@ -0,0 +1,81 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file simply creates the AUTHOR file based on parser content.""" + +import os +import fnmatch + + +def ProcessFile(file_path): + """Process a single file to match for an author tag.""" + # TODO: Change to do a "proper" import of modules and + # check the __author__ attribute of it. + # Current approach does not work if the author tag is a list + # instead of a single attribute (current files as of writing do + # not have that behavior, but that might change in the future). + ret = '' + with open(file_path, 'rb') as fh: + for line in fh: + if '__author__' in line: + _, _, ret = line[:-1].partition(' = ') + return ret[1:-1] + + +if __name__ == '__main__': + header = """# Names should be added to this file with this pattern: +# +# For individuals: +# Name (email address) +# +# For organizations: +# Organization (fnmatch pattern) +# +# See python fnmatch module documentation for more information. + +Google Inc. (*@google.com) +Kristinn Gudjonsson (kiddi@kiddaland.net) +Joachim Metz (joachim.metz@gmail.com) +Eric Mak (ericmak@gmail.com) +Elizabeth Schweinsberg (beth@bethlogic.net) +Keith Wall (kwallster@gmail.com) +""" + authors = [] + + with open('AUTHORS', 'wb') as out_file: + out_file.write(header) + + for path, folders, files in os.walk('.'): + if path in ('utils', 'tools', 'build'): + continue + for filematch in fnmatch.filter(files, '*.py'): + author = ProcessFile(os.path.join(path, filematch)) + if not author: + continue + if type(author) in (list, tuple): + for author_name in author: + if author_name not in authors: + authors.append(author) + else: + if author not in authors: + authors.append(author) + + + out_file.write('\n'.join(authors)) + out_file.write('\n') + + print 'Added {0:d} authors from files.'.format(len(authors)) diff --git a/utils/dependencies.ini b/utils/dependencies.ini new file mode 100644 index 0000000..c2a6bc2 --- /dev/null +++ b/utils/dependencies.ini @@ -0,0 +1,164 @@ +[bencode] +homepage_url: http://bittorent.com/ +download_url: https://pypi.python.org/pypi/bencode +maintainer: Thomas Rampelberg +description_short: The BitTorrent bencode module as light-weight, standalone + package +description_long: The BitTorrent bencode module as light-weight, standalone + package + +[binplist] +minimum_version: 0.1.4 +homepage_url: https://code.google.com/p/binplist/ +download_url: https://code.google.com/p/binplist/downloads/list + +[construct] +minimum_version: 2.5.2 +homepage_url: http://construct.readthedocs.org/en/latest/ +download_url: https://pypi.python.org/pypi/construct +maintainer: Tomer Filiba +description_short: Construct is a powerful declarative parser (and builder) + for binary data +description_long: Construct is a powerful declarative parser (and builder) + for binary data + +[dfvfs] +minimum_version: 20140824 +download_url: https://github.com/log2timeline/dfvfs + +[dpkt] +minimum_version: 1.8 +maintainer: Dug Song +homepage_url: https://code.google.com/p/dpkt/ +download_url: https://code.google.com/p/dpkt/downloads/list +description_short: Python packet creation / parsing module +description_long: Python module for fast, simple packet creation / parsing, + with definitions for the basic TCP/IP protocols. + +[ipython] +minimum_version: 1.2.1 + +[libbde] +minimum_version: 20140531 +download_url: https://github.com/libyal/libbde + +[libesedb] +minimum_version: 20140301 +download_url: https://github.com/libyal/libesedb + +[libevt] +minimum_version: 20141026 +download_url: https://github.com/libyal/libevt + +[libevtx] +minimum_version: 20141112 +download_url: https://github.com/libyal/libevtx + +[libewf] +minimum_version: 20131210 +download_url: https://googledrive.com/host/0B3fBvzttpiiSMTdoaVExWWNsRjg/ + +[libfwsi] +minimum_version: 20140714 +download_url: https://github.com/libyal/libfwsi + +[liblnk] +minimum_version: 20141026 +download_url: https://github.com/libyal/liblnk + +[libmsiecf] +minimum_version: 20130317 +download_url: https://github.com/libyal/libmsiecf + +[libolecf] +minimum_version: 20131012 +download_url: https://github.com/libyal/libolecf + +[libqcow] +minimum_version: 20131204 +download_url: https://github.com/libyal/libqcow + +[libregf] +minimum_version: 20130716 +download_url: https://github.com/libyal/libregf + +[libsmdev] +minimum_version: 20140529 +download_url: https://github.com/libyal/libsmdev + +[libsmraw] +minimum_version: 20140612 +download_url: https://github.com/libyal/libsmraw + +[libvhdi] +minimum_version: 20131210 +download_url: https://github.com/libyal/libvhdi + +[libvmdk] +minimum_version: 20140421 +download_url: https://github.com/libyal/libvmdk + +[libvshadow] +minimum_version: 20131209 +download_url: https://github.com/libyal/libvshadow + +[psutil] +minimum_version: 1.2.1 + +[pyparsing] +minimum_version: 2.0.2 +maintainer: Paul McGuire +homepage_url: http://pyparsing.wikispaces.com/ +download_url: http://sourceforge.net/projects/pyparsing/files/ +description_short: +description_long: The parsing module is an alternative approach to creating + and executing simple grammars, vs. the traditional lex/yacc approach, + or the use of regular expressions. The parsing module provides a library + of classes that client code uses to construct the grammar directly + in Python code. + +[pysqlite] +minimum_version: 3.7.8 +maintainer: Gerhard Häring +homepage_url: https://github.com/ghaering/pysqlite +download_url: https://pypi.python.org/pypi/pysqlite/ +description_short: +description_long: pysqlite is a DB-API 2.0-compliant database interface + for SQLite. + +[pytz] +dpkg_dependencies: tzdata +dpkg_name: tz +maintainer: Stuart Bishop +homepage_url: http://pythonhosted.org/pytz/ +download_url: http://pypi.python.org/pypi/pytz/ +description_short: +description_long: python-tz brings the Olson tz database into Python. + This library allows accurate and cross platform timezone calculations + using Python 2.3 or higher. It also solves the issue of ambiguous times + at the end of daylight savings, which you can read more about in + the Python Library Reference (datetime.tzinfo). + +[PyYAML] +minimum_version: 3.10 +dpkg_name: yaml +maintainer: Kirill Simonov +homepage_url: http://pyyaml.org/ +download_url: https://pypi.python.org/pypi/PyYAML +description_short: +description_long: Python-yaml is a complete YAML 1.1 parser and emitter + for Python. It can parse all examples from the specification. The parsing + algorithm is simple enough to be a reference for YAML parser implementors. + A simple extension API is also provided. The package is built using libyaml + for improved speed. + +[six] +minimum_version: 1.1.0 +maintainer: Benjamin Peterson +homepage_url: http://pypi.python.org/pypi/six/ +download_url: http://pypi.python.org/pypi/six/ +description_short: Python 2 and 3 compatibility library (Python 2 interface) +description_long: Six is a Python 2 and 3 compatibility library. It provides + utility functions for smoothing over the differences between the Python + versions with the goal of writing Python code that is compatible on both + Python versions. diff --git a/utils/download_patch_set.py b/utils/download_patch_set.py new file mode 100644 index 0000000..001ebb6 --- /dev/null +++ b/utils/download_patch_set.py @@ -0,0 +1,115 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright 2013 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This file contains a simple utility to fetch content of code reviews.""" +import os +import json +import urllib2 +import sys +import subprocess +import tempfile + + +def DownloadPatchSet(cl_number): + """Returns the name of the patch file for a given CL. + + Args: + cl_number: The CL number for the code review. + + Returns: + The name fo the patch file, or a None if unable to download + the patch. + """ + try: + test_cl = int(cl_number) + if cl_number != str(test_cl): + return + except ValueError: + return + + url = 'https://codereview.appspot.com/api/{0}/'.format(cl_number) + url_object = urllib2.urlopen(url) + + if url_object.code != 200: + return + + data = url_object.read() + + try: + data_obj = json.loads(data) + except ValueError: + return + + patches = data_obj.get('patchsets', []) + last_patch = patches.pop() + + patch_url = 'https://codereview.appspot.com/download/issue{}_{}.diff'.format( + cl_number, last_patch) + + patch_object = urllib2.urlopen(patch_url) + if patch_object.code != 200: + return + + patch_data = patch_object.read() + patch_file_name = '' + with tempfile.NamedTemporaryFile(delete=False) as patch_file_object: + patch_file_object.write(patch_data) + patch_file_name = patch_file_object.name + + return patch_file_name + + +if __name__ == '__main__': + if len(sys.argv) != 2: + print 'Need to provide a CL number.' + sys.exit(1) + + code_review_number = sys.argv[1] + patch_file = DownloadPatchSet(code_review_number) + + if not patch_file: + print 'Unable to download a patch set, exiting.' + sys.exit(1) + + branch_name = 'review_{}'.format(code_review_number) + branch_exit = os.system('git checkout -b {}'.format(branch_name)) + if branch_exit: + print 'Unable to create a new branch, exiting.' + sys.exit(1) + + patch_exit = os.system('patch -p1 < {}'.format(patch_file)) + if patch_exit: + print 'Unable to patch files.' + sys.exit(1) + + git_add = subprocess.Popen( + 'git status -s', shell=True, stdout=subprocess.PIPE) + git_to_add = [] + for git_line in git_add.stdout: + if git_line.startswith('??'): + git_to_add.append(git_line[3:-1]) + + os.system('git add -A') + print 'Files added to git branch' + os.system('git commit -a "Committing CL to branch"') + + os.remove(patch_file) + + print 'Patch downloaded and applied, branch {} created.'.format( + branch_name) + print 'Remember to delete branch when done testing/inspecting.' + print 'git checkout master && git branch -D {}'.format(branch_name) + diff --git a/utils/doxygen.conf b/utils/doxygen.conf new file mode 100644 index 0000000..e1caf6e --- /dev/null +++ b/utils/doxygen.conf @@ -0,0 +1,1781 @@ +# Doxyfile 1.7.6.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" "). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or sequence of words) that should +# identify the project. Note that if you do not use Doxywizard you need +# to put quotes around the project name if it contains spaces. + +PROJECT_NAME = "Plaso - SuperTimeline at it's best." + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = 1.0.2 + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer +# a quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = "Generate and analyse a super timeline." + +# With the PROJECT_LOGO tag one can specify an logo or icon that is +# included in the documentation. The maximum height of the logo should not +# exceed 55 pixels and the maximum width should not exceed 200 pixels. +# Doxygen will copy the logo to the output directory. + +PROJECT_LOGO = "config/logo.jpg" + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = "doc" + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful if your file system +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = YES + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = YES + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding +# "class=itcl::class" will allow you to use the command class in the +# itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = YES + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also makes the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and +# unions are shown inside the group in which they are included (e.g. using +# @ingroup) instead of on a separate page (for HTML and Man pages) or +# section (for LaTeX and RTF). + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and +# unions with only public data fields will be shown inline in the documentation +# of the scope in which they are defined (i.e. file, namespace, or group +# documentation), provided this scope is documented. If set to NO (the default), +# structs, classes, and unions are shown on a separate page (for HTML and Man +# pages) or section (for LaTeX and RTF). + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penalty. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will roughly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols. + +SYMBOL_CACHE_SIZE = 0 + +# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be +# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given +# their name and scope. Since this can be an expensive process and often the +# same symbol appear multiple times in the code, doxygen keeps a cache of +# pre-resolved symbols. If the cache is too small doxygen will become slower. +# If the cache is too large, memory is wasted. The cache size is given by this +# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespaces are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to +# do proper type resolution of all parameters of a function it will reject a +# match between the prototype and the implementation of a member function even +# if there is only one candidate or it is obvious which candidate to choose +# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen +# will still accept a match between prototype and implementation in such cases. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or macro consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and macros in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. The create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files +# containing the references data. This must be a list of .bib files. The +# .bib extension is automatically appended if omitted. Using this command +# requires the bibtex tool to be installed. See also +# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style +# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this +# feature you need bibtex and perl available in the search path. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# The WARN_NO_PARAMDOC option can be enabled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = "plaso" + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh +# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py +# *.f90 *.f *.for *.vhd *.vhdl + +FILE_PATTERNS = "*.py" + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = YES + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = "*_test.py" + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = "python /usr/bin/doxypy" + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty or if +# non of the patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = YES + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) +# and it is also possible to disable source filtering for a specific pattern +# using *.ext= (so without naming a filter). This option only has effect when +# FILTER_SOURCE_FILES is enabled. + +FILTER_SOURCE_PATTERNS = + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 8 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. Note that when using a custom header you are responsible +# for the proper inclusion of any scripts and style sheets that doxygen +# needs, which is dependent on the configuration options used. +# It is advised to generate a default header using "doxygen -w html +# header.html footer.html stylesheet.css YourConfigFile" and then modify +# that header. Note that the header is subject to change so you typically +# have to redo this when upgrading to a newer version of doxygen or when +# changing the value of configuration settings such as GENERATE_TREEVIEW! + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# style sheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that +# the files will be copied as-is; there are no commands or markers available. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the style sheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = YES + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) +# at top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. Since the tabs have the same information as the +# navigation tree you can set this option to NO if you already set +# GENERATE_TREEVIEW to YES. + +DISABLE_INDEX = YES + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. +# Since the tree basically has the same information as the tab index you +# could consider to set DISABLE_INDEX to NO when enabling this option. + +GENERATE_TREEVIEW = YES + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values +# (range [0,1..20]) that doxygen will group on one line in the generated HTML +# documentation. Note that a value of 0 will completely suppress the enum +# values from appearing in the overview section. + +ENUM_VALUES_PER_LINE = 4 + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax +# (see http://www.mathjax.org) which uses client side Javascript for the +# rendering instead of using prerendered bitmaps. Use this if you do not +# have LaTeX installed or if you want to formulas look prettier in the HTML +# output. When enabled you also need to install MathJax separately and +# configure the path to it using the MATHJAX_RELPATH option. + +USE_MATHJAX = NO + +# When MathJax is enabled you need to specify the location relative to the +# HTML output directory using the MATHJAX_RELPATH option. The destination +# directory should contain the MathJax.js script. For instance, if the mathjax +# directory is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the +# mathjax.org site, so you can quickly see the result without installing +# MathJax, but it is strongly recommended to install a local copy of MathJax +# before deployment. + +MATHJAX_RELPATH = http://www.mathjax.org/mathjax + +# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension +# names that should be enabled during MathJax rendering. + +MATHJAX_EXTENSIONS = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvantages are that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for +# the generated latex document. The footer should contain everything after +# the last chapter. If it is left blank doxygen will generate a +# standard footer. Notice: only use this tag if you know what you are doing! + +LATEX_FOOTER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +# The LATEX_BIB_STYLE tag can be used to specify the style to use for the +# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See +# http://en.wikipedia.org/wiki/BibTeX for more info. + +LATEX_BIB_STYLE = plain + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load style sheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# pointed to by INCLUDE_PATH will be searched when a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition that +# overrules the definition found in the source code. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all references to function-like macros +# that are alone on a line, have an all uppercase name, and do not end with a +# semicolon, because these will confuse the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option also works with HAVE_DOT disabled, but it is recommended to +# install and use dot, since it yields more powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = NO + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will use the Helvetica font for all dot files that +# doxygen generates. When you want a differently looking font you can specify +# the font name using DOT_FONTNAME. You need to make sure dot is able to find +# the font, which can be done by putting it in a standard location or by setting +# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the +# directory containing the font. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the Helvetica font. +# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to +# set the path where dot can find it. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will generate a graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are svg, png, jpg, or gif. +# If left blank png will be used. If you choose svg you need to set +# HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible in IE 9+ (other browsers do not have this requirement). + +DOT_IMAGE_FORMAT = png + +# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to +# enable generation of interactive SVG images that allow zooming and panning. +# Note that this requires a modern browser other than Internet Explorer. +# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you +# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible. Older versions of IE do not have SVG support. + +INTERACTIVE_SVG = NO + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the +# \mscfile command). + +MSCFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = YES + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/utils/example_vimrc b/utils/example_vimrc new file mode 100644 index 0000000..65b5a1f --- /dev/null +++ b/utils/example_vimrc @@ -0,0 +1,51 @@ +set nocompatible +set backspace=2 + +syntax on +" set both tabstop and shiftwidth to 2 spaces +set sw=2 +set ts=2 ai et list + +set showmatch "matching brackets +if has("autocmd") + au BufReadPost * if line("'\"") > 1 && line("'\"") <= line("$") | exe "normal! g'\"" | endif +endif + +" Indent Python in the Google way. +setlocal indentexpr=GetGooglePythonIndent(v:lnum) + +let s:maxoff = 50 " maximum number of lines to look backwards. + +function GetGooglePythonIndent(lnum) + + " Indent inside parens. + " Align with the open paren unless it is at the end of the line. + " E.g. + " open_paren_not_at_EOL(100, + " (200, + " 300), + " 400) + " open_paren_at_EOL( + " 100, 200, 300, 400) + call cursor(a:lnum, 1) + let [par_line, par_col] = searchpairpos('(\|{\|\[', '', ')\|}\|\]', 'bW', + \ "line('.') < " . (a:lnum - s:maxoff) . " ? dummy :" + \ . " synIDattr(synID(line('.'), col('.'), 1), 'name')" + \ . " =~ '\\(Comment\\|String\\)$'") + if par_line > 0 + call cursor(par_line, 1) + if par_col != col("$") - 1 + return par_col + endif + endif + + " Delegate the rest to the original function. + return GetPythonIndent(a:lnum) + +endfunction + +let pyindent_nested_paren="&sw*2" +let pyindent_open_paren="&sw*2" + +set textwidth=80 +set ruler diff --git a/utils/git-cl b/utils/git-cl new file mode 100755 index 0000000..b7e431c --- /dev/null +++ b/utils/git-cl @@ -0,0 +1,871 @@ +#!/usr/bin/python +# git-cl -- a git-command for integrating reviews on Rietveld +# Copyright (C) 2008 Evan Martin + +import getpass +import optparse +import os +import re +import readline +import subprocess +import sys +import tempfile +import textwrap +import upload +import urllib2 + +try: + import readline +except ImportError: + pass + +DEFAULT_SERVER = 'codereview.appspot.com' +PREDCOMMIT_HOOK = '.git/hooks/pre-cl-dcommit' +PREUPLOAD_HOOK = '.git/hooks/pre-cl-upload' + +def DieWithError(message): + print >>sys.stderr, message + sys.exit(1) + + +def RunCommand(cmd, error_ok=False, error_message=None, exit_code=False, + redirect_stdout=True): + # Useful for debugging: + # print >>sys.stderr, ' '.join(cmd) + if redirect_stdout: + stdout = subprocess.PIPE + else: + stdout = None + proc = subprocess.Popen(cmd, stdout=stdout) + output = proc.communicate()[0] + if exit_code: + return proc.returncode + if not error_ok and proc.returncode != 0: + DieWithError('Command "%s" failed.\n' % (' '.join(cmd)) + + (error_message or output)) + return output + + +def RunGit(args, **kwargs): + cmd = ['git'] + args + return RunCommand(cmd, **kwargs) + + +class Settings: + def __init__(self): + self.server = None + self.cc = None + self.root = None + self.is_git_svn = None + self.svn_branch = None + self.tree_status_url = None + self.viewvc_url = None + + def GetServer(self, error_ok=False): + if not self.server: + if not error_ok: + error_message = ('You must configure your review setup by running ' + '"git cl config".') + self.server = self._GetConfig('rietveld.server', + error_message=error_message) + else: + self.server = self._GetConfig('rietveld.server', error_ok=True) + return self.server + + def GetCCList(self): + if self.cc is None: + self.cc = self._GetConfig('rietveld.cc', error_ok=True) + return self.cc + + def GetRoot(self): + if not self.root: + self.root = os.path.abspath(RunGit(['rev-parse', '--show-cdup']).strip()) + return self.root + + def GetIsGitSvn(self): + """Return true if this repo looks like it's using git-svn.""" + if self.is_git_svn is None: + # If you have any "svn-remote.*" config keys, we think you're using svn. + self.is_git_svn = RunGit(['config', '--get-regexp', r'^svn-remote\.'], + exit_code=True) == 0 + return self.is_git_svn + + def GetSVNBranch(self): + if self.svn_branch is None: + if not self.GetIsGitSvn(): + raise "Repo doesn't appear to be a git-svn repo." + + # Try to figure out which remote branch we're based on. + # Strategy: + # 1) find all git-svn branches and note their svn URLs. + # 2) iterate through our branch history and match up the URLs. + + # regexp matching the git-svn line that contains the URL. + git_svn_re = re.compile(r'^\s*git-svn-id: (\S+)@', re.MULTILINE) + + # Get the refname and svn url for all refs/remotes/*. + remotes = RunGit(['for-each-ref', '--format=%(refname)', + 'refs/remotes']).splitlines() + svn_refs = {} + for ref in remotes: + match = git_svn_re.search(RunGit(['cat-file', '-p', ref])) + if match: + svn_refs[match.group(1)] = ref + + if len(svn_refs) == 1: + # Only one svn branch exists -- seems like a good candidate. + self.svn_branch = svn_refs.values()[0] + elif len(svn_refs) > 1: + # We have more than one remote branch available. We don't + # want to go through all of history, so read a line from the + # pipe at a time. + # The -100 is an arbitrary limit so we don't search forever. + cmd = ['git', 'log', '-100', '--pretty=medium'] + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) + for line in proc.stdout: + match = git_svn_re.match(line) + if match: + url = match.group(1) + if url in svn_refs: + self.svn_branch = svn_refs[url] + proc.stdout.close() # Cut pipe. + break + + if not self.svn_branch: + raise "Can't guess svn branch -- try specifying it on the command line" + + return self.svn_branch + + def GetTreeStatusUrl(self, error_ok=False): + if not self.tree_status_url: + error_message = ('You must configure your tree status URL by running ' + '"git cl config".') + self.tree_status_url = self._GetConfig('rietveld.tree-status-url', + error_ok=error_ok, + error_message=error_message) + return self.tree_status_url + + def GetViewVCUrl(self): + if not self.viewvc_url: + self.viewvc_url = self._GetConfig('rietveld.viewvc-url', error_ok=True) + return self.viewvc_url + + def _GetConfig(self, param, **kwargs): + return RunGit(['config', param], **kwargs).strip() + + +settings = Settings() + + +did_migrate_check = False +def CheckForMigration(): + """Migrate from the old issue format, if found. + + We used to store the branch<->issue mapping in a file in .git, but it's + better to store it in the .git/config, since deleting a branch deletes that + branch's entry there. + """ + + # Don't run more than once. + global did_migrate_check + if did_migrate_check: + return + + gitdir = RunGit(['rev-parse', '--git-dir']).strip() + storepath = os.path.join(gitdir, 'cl-mapping') + if os.path.exists(storepath): + print "old-style git-cl mapping file (%s) found; migrating." % storepath + store = open(storepath, 'r') + for line in store: + branch, issue = line.strip().split() + RunGit(['config', 'branch.%s.rietveldissue' % ShortBranchName(branch), + issue]) + store.close() + os.remove(storepath) + did_migrate_check = True + + +def IssueURL(issue): + """Get the URL for a particular issue.""" + return 'http://%s/%s' % (settings.GetServer(), issue) + + +def ShortBranchName(branch): + """Convert a name like 'refs/heads/foo' to just 'foo'.""" + return branch.replace('refs/heads/', '') + + +class Changelist: + def __init__(self, branchref=None): + # Poke settings so we get the "configure your server" message if necessary. + settings.GetServer() + self.branchref = branchref + if self.branchref: + self.branch = ShortBranchName(self.branchref) + else: + self.branch = None + self.upstream_branch = None + self.has_issue = False + self.issue = None + self.has_description = False + self.description = None + + def GetBranch(self): + """Returns the short branch name, e.g. 'master'.""" + if not self.branch: + self.branchref = RunGit(['symbolic-ref', 'HEAD']).strip() + self.branch = ShortBranchName(self.branchref) + return self.branch + def GetBranchRef(self): + """Returns the full branch name, e.g. 'refs/heads/master'.""" + self.GetBranch() # Poke the lazy loader. + return self.branchref + + def GetUpstreamBranch(self): + if self.upstream_branch is None: + branch = self.GetBranch() + upstream_branch = RunGit(['config', 'branch.%s.merge' % branch], + error_ok=True).strip() + if upstream_branch: + remote = RunGit(['config', 'branch.%s.remote' % branch]).strip() + # We have remote=origin and branch=refs/heads/foobar; convert to + # refs/remotes/origin/foobar. + self.upstream_branch = upstream_branch.replace('heads', + 'remotes/' + remote) + + if not self.upstream_branch: + # Fall back on trying a git-svn upstream branch. + if settings.GetIsGitSvn(): + self.upstream_branch = settings.GetSVNBranch() + + if not self.upstream_branch: + DieWithError("""Unable to determine default branch to diff against. +Either pass complete "git diff"-style arguments, like + git cl upload origin/master +or verify this branch is set up to track another (via the --track argument to +"git checkout -b ...").""") + + return self.upstream_branch + + def GetIssue(self): + if not self.has_issue: + CheckForMigration() + issue = RunGit(['config', self._IssueSetting()], error_ok=True).strip() + if issue: + self.issue = issue + else: + self.issue = None + self.has_issue = True + return self.issue + + def GetIssueURL(self): + return IssueURL(self.GetIssue()) + + def GetDescription(self, pretty=False): + if not self.has_description: + if self.GetIssue(): + url = self.GetIssueURL() + '/description' + self.description = urllib2.urlopen(url).read().strip() + self.has_description = True + if pretty: + wrapper = textwrap.TextWrapper() + wrapper.initial_indent = wrapper.subsequent_indent = ' ' + return wrapper.fill(self.description) + return self.description + + def GetPatchset(self): + if not self.has_patchset: + patchset = RunGit(['config', self._PatchsetSetting()], + error_ok=True).strip() + if patchset: + self.patchset = patchset + else: + self.patchset = None + self.has_patchset = True + return self.patchset + + def SetPatchset(self, patchset): + """Set this branch's patchset. If patchset=0, clears the patchset.""" + if patchset: + RunGit(['config', self._PatchsetSetting(), str(patchset)]) + else: + RunGit(['config', '--unset', self._PatchsetSetting()]) + self.has_patchset = False + + def SetIssue(self, issue): + """Set this branch's issue. If issue=0, clears the issue.""" + if issue: + RunGit(['config', self._IssueSetting(), str(issue)]) + else: + RunGit(['config', '--unset', self._IssueSetting()]) + self.SetPatchset(0) + self.has_issue = False + + def CloseIssue(self): + def GetUserCredentials(): + email = raw_input('Email: ').strip() + password = getpass.getpass('Password for %s: ' % email) + return email, password + + rpc_server = upload.HttpRpcServer(settings.GetServer(), + GetUserCredentials, + host_override=settings.GetServer(), + save_cookies=True) + # You cannot close an issue with a GET. + # We pass an empty string for the data so it is a POST rather than a GET. + data = [("description", self.description),] + ctype, body = upload.EncodeMultipartFormData(data, []) + rpc_server.Send('/' + self.GetIssue() + '/close', body, ctype) + + def _IssueSetting(self): + """Return the git setting that stores this change's issue.""" + return 'branch.%s.rietveldissue' % self.GetBranch() + + def _PatchsetSetting(self): + """Return the git setting that stores this change's most recent patchset.""" + return 'branch.%s.rietveldpatchset' % self.GetBranch() + + +def GetCodereviewSettingsInteractively(): + """Prompt the user for settings.""" + server = settings.GetServer(error_ok=True) + prompt = 'Rietveld server (host[:port])' + prompt += ' [%s]' % (server or DEFAULT_SERVER) + newserver = raw_input(prompt + ': ') + if not server and not newserver: + newserver = DEFAULT_SERVER + if newserver and newserver != server: + RunGit(['config', 'rietveld.server', newserver]) + + def SetProperty(initial, caption, name): + prompt = caption + if initial: + prompt += ' ("x" to clear) [%s]' % initial + new_val = raw_input(prompt + ': ') + if new_val == 'x': + RunGit(['config', '--unset-all', 'rietveld.' + name], error_ok=True) + elif new_val and new_val != initial: + RunGit(['config', 'rietveld.' + name, new_val]) + + SetProperty(settings.GetCCList(), 'CC list', 'cc') + SetProperty(settings.GetTreeStatusUrl(error_ok=True), 'Tree status URL', + 'tree-status-url') + SetProperty(settings.GetViewVCUrl(), 'ViewVC URL', 'viewvc-url') + + # TODO: configure a default branch to diff against, rather than this + # svn-based hackery. + + +def LoadCodereviewSettingsFromFile(file): + """Parse a codereview.settings file.""" + settings = {} + for line in file.read().splitlines(): + if not line or line.startswith("#"): + continue + k, v = line.split(": ", 1) + settings[k] = v + + def GetProperty(name): + return settings.get(name) + + def SetProperty(name, setting, unset_error_ok=False): + fullname = 'rietveld.' + name + if setting in settings: + RunGit(['config', fullname, settings[setting]]) + else: + RunGit(['config', '--unset-all', fullname], error_ok=unset_error_ok) + + SetProperty('server', 'CODE_REVIEW_SERVER') + # Only server setting is required. Other settings can be absent. + # In that case, we ignore errors raised during option deletion attempt. + SetProperty('cc', 'CC_LIST', unset_error_ok=True) + SetProperty('tree-status-url', 'STATUS', unset_error_ok=True) + SetProperty('viewvc-url', 'VIEW_VC', unset_error_ok=True) + hooks = {} + if GetProperty('GITCL_PREUPLOAD'): + hooks['preupload'] = GetProperty('GITCL_PREUPLOAD') + if GetProperty('GITCL_PREDCOMMIT'): + hooks['predcommit'] = GetProperty('GITCL_PREDCOMMIT') + return hooks + + +def CmdConfig(args): + def DownloadToFile(url, filename): + filename = os.path.join(settings.GetRoot(), filename) + if os.path.exists(filename): + print '%s exists, skipping' % filename + return False + contents = urllib2.urlopen(url).read() + file = open(filename, 'w') + file.write(contents) + file.close() + os.chmod(filename, 0755) + return True + + parser = optparse.OptionParser( + usage='git cl config [repo root containing codereview.settings]') + (options, args) = parser.parse_args(args) + if len(args) == 0: + GetCodereviewSettingsInteractively() + return + + url = args[0] + if not url.endswith('codereview.settings'): + url = os.path.join(url, 'codereview.settings') + + # Load Codereview settings and download hooks (if available). + hooks = LoadCodereviewSettingsFromFile(urllib2.urlopen(url)) + for key, filename in (('predcommit', PREDCOMMIT_HOOK), + ('preupload', PREUPLOAD_HOOK)): + if key in hooks: + DownloadToFile(hooks[key], filename) + + +def CmdStatus(args): + parser = optparse.OptionParser(usage='git cl status [options]') + parser.add_option('--field', help='print only specific field (desc|id|url)') + (options, args) = parser.parse_args(args) + + # TODO: maybe make show_branches a flag if necessary. + show_branches = not options.field + + if show_branches: + branches = RunGit(['for-each-ref', '--format=%(refname)', 'refs/heads']) + if branches: + print 'Branches associated with reviews:' + for branch in sorted(branches.splitlines()): + cl = Changelist(branchref=branch) + print " %10s: %s" % (cl.GetBranch(), cl.GetIssue()) + + cl = Changelist() + if options.field: + if options.field.startswith('desc'): + print cl.GetDescription() + elif options.field == 'id': + print cl.GetIssue() + elif options.field == 'url': + print cl.GetIssueURL() + else: + print + print 'Current branch:', + if not cl.GetIssue(): + print 'no issue assigned.' + return 0 + print cl.GetBranch() + print 'Issue number:', cl.GetIssue(), '(%s)' % cl.GetIssueURL() + print 'Issue description:' + print cl.GetDescription(pretty=True) + + +def CmdIssue(args): + parser = optparse.OptionParser(usage='git cl issue [issue_number]') + parser.description = ('Set or display the current code review issue. ' + + 'Pass issue number 0 to clear the current issue.') + (options, args) = parser.parse_args(args) + + cl = Changelist() + if len(args) > 0: + cl.SetIssue(int(args[0])) + print 'Issue number:', cl.GetIssue(), '(%s)' % cl.GetIssueURL() + + +def UserEditedLog(starting_text): + """Given some starting text, let the user edit it and return the result.""" + editor = os.getenv('EDITOR', 'vi') + + (file_handle, filename) = tempfile.mkstemp() + file = os.fdopen(file_handle, 'w') + file.write(starting_text) + file.close() + + ret = subprocess.call(editor + ' ' + filename, shell=True) + if ret != 0: + os.remove(filename) + return + + file = open(filename) + text = file.read() + file.close() + os.remove(filename) + stripcomment_re = re.compile(r'^#.*$', re.MULTILINE) + return stripcomment_re.sub('', text).strip() + + +def RunHook(hook, upstream_branch='origin', error_ok=False): + """Run a given hook if it exists. By default, we fail on errors.""" + hook = '%s/%s' % (settings.GetRoot(), hook) + if not os.path.exists(hook): + return + output = RunCommand([hook, upstream_branch], error_ok).strip() + if output != '': + print output + + +def CmdPresubmit(args): + """Reports what presubmit checks on the change would report.""" + parser = optparse.OptionParser( + usage='git cl presubmit [options]') + (options, args) = parser.parse_args(args) + + if RunGit(['diff-index', 'HEAD']): + print 'Cannot presubmit with a dirty tree. You must commit locally first.' + return 1 + + print '*** Presubmit checks for UPLOAD would report: ***' + RunHook(PREUPLOAD_HOOK, error_ok=True) + + print '*** Presubmit checks for DCOMMIT would report: ***' + RunHook(PREDCOMMIT_HOOK, error_ok=True) + + +def CmdUpload(args): + parser = optparse.OptionParser( + usage='git cl upload [options] [args to "git diff"]') + parser.add_option('--bypass-hooks', action='store_true', dest='bypass_hooks', + help='bypass upload presubmit hook') + parser.add_option('-m', dest='message', help='message for patch') + parser.add_option('-r', '--reviewers', + help='reviewer email addresses') + parser.add_option('--send-mail', action='store_true', + help='send email to reviewer immediately') + (options, args) = parser.parse_args(args) + + if RunGit(['diff-index', 'HEAD']): + print 'Cannot upload with a dirty tree. You must commit locally first.' + return 1 + + cl = Changelist() + if args: + base_branch = args[0] + else: + # Default to diffing against the "upstream" branch. + base_branch = cl.GetUpstreamBranch() + args = [base_branch + "..."] + + if not options.bypass_hooks: + RunHook(PREUPLOAD_HOOK, upstream_branch=base_branch, error_ok=False) + + # --no-ext-diff is broken in some versions of Git, so try to work around + # this by overriding the environment (but there is still a problem if the + # git config key "diff.external" is used). + env = os.environ.copy() + if 'GIT_EXTERNAL_DIFF' in env: del env['GIT_EXTERNAL_DIFF'] + subprocess.call(['git', 'diff', '--no-ext-diff', '--stat', '-M'] + args, + env=env) + + upload_args = ['--assume_yes'] # Don't ask about untracked files. + upload_args.extend(['--server', settings.GetServer()]) + if options.reviewers: + upload_args.extend(['--reviewers', options.reviewers]) + upload_args.extend(['--cc', settings.GetCCList()]) + if options.message: + upload_args.extend(['--message', options.message]) + if options.send_mail: + if not options.reviewers: + DieWithError("Must specify reviewers to send email.") + upload_args.append('--send_mail') + if cl.GetIssue(): + upload_args.extend(['--issue', cl.GetIssue()]) + print ("This branch is associated with issue %s. " + "Adding patch to that issue." % cl.GetIssue()) + else: + # Construct a description for this change from the log. + # We need to convert diff options to log options. + log_args = [] + if len(args) == 1 and not args[0].endswith('.'): + log_args = [args[0] + '..'] + elif len(args) == 2: + log_args = [args[0] + '..' + args[1]] + else: + log_args = args[:] # Hope for the best! + desc = RunGit(['log', '--pretty=format:%s\n\n%b'] + log_args) + initial_text = """# Enter a description of the change. +# This will displayed on the codereview site. +# The first line will also be used as the subject of the review.""" + desc = UserEditedLog(initial_text + '\n' + desc) + if not desc: + print "Description empty; aborting." + return 1 + subject = desc.splitlines()[0] + upload_args.extend(['--message', subject]) + upload_args.extend(['--description', desc]) + issue, patchset = upload.RealMain(['upload'] + upload_args + args) + if not cl.GetIssue(): + cl.SetIssue(issue) + cl.SetPatchset(patchset) + + +def CmdDCommit(args): + parser = optparse.OptionParser( + usage='git cl dcommit [options] [git-svn branch to apply against]') + parser.add_option('--bypass-hooks', action='store_true', dest='bypass_hooks', + help='bypass upload presubmit hook') + parser.add_option('-m', dest='message', + help="override review description") + parser.add_option('-f', action='store_true', dest='force', + help="force yes to questions (don't prompt)") + parser.add_option('-c', dest='contributor', + help="external contributor for patch (appended to " + + "description)") + parser.add_option('--tbr', action='store_true', dest='tbr', + help="short for 'to be reviewed', commit branch " + + "even without uploading for review") + (options, args) = parser.parse_args(args) + + cl = Changelist() + + if not args: + # Default to merging against our best guess of the upstream branch. + args = [cl.GetUpstreamBranch()] + + base_branch = args[0] + + if RunGit(['diff-index', 'HEAD']): + print 'Cannot dcommit with a dirty tree. You must commit locally first.' + return 1 + + # This rev-list syntax means "show all commits not in my branch that + # are in base_branch". + upstream_commits = RunGit(['rev-list', '^' + cl.GetBranchRef(), + base_branch]).splitlines() + if upstream_commits: + print ('Base branch "%s" has %d commits ' + 'not in this branch.' % (base_branch, len(upstream_commits))) + print 'Run "git merge %s" before attempting to dcommit.' % base_branch + return 1 + + if not options.force and not options.bypass_hooks: + RunHook(PREDCOMMIT_HOOK, upstream_branch=base_branch, error_ok=False) + + # Check the tree status if the tree status URL is set. + status = GetTreeStatus() + if 'closed' == status: + print ('The tree is closed. Please wait for it to reopen. Use ' + '"git cl dcommit -f" to commit on a closed tree.') + return 1 + elif 'unknown' == status: + print ('Unable to determine tree status. Please verify manually and ' + 'use "git cl dcommit -f" to commit on a closed tree.') + + description = options.message + if not options.tbr: + # It is important to have these checks early. Not only for user + # convenience, but also because the cl object then caches the correct values + # of these fields even as we're juggling branches for setting up the commit. + if not cl.GetIssue(): + print 'Current issue unknown -- has this branch been uploaded?' + print 'Use --tbr to commit without review.' + return 1 + + if not description: + description = cl.GetDescription() + + if not description: + print 'No description set.' + print 'Visit %s/edit to set it.' % (cl.GetIssueURL()) + return 1 + + description += "\n\nReview URL: %s" % cl.GetIssueURL() + else: + # Submitting TBR. Get a description now. + if not description: + description = UserEditedLog('TBR: ') + + if not description: + print "Description empty; aborting." + return 1 + + if options.contributor: + description += "\nPatch from %s." % options.contributor + print 'Description:', repr(description) + + branches = [base_branch, cl.GetBranchRef()] + if not options.force: + subprocess.call(['git', 'diff', '--stat'] + branches) + raw_input("About to commit; enter to confirm.") + + # We want to squash all this branch's commits into one commit with the + # proper description. + # We do this by doing a "merge --squash" into a new commit branch, then + # dcommitting that. + MERGE_BRANCH = 'git-cl-commit' + # Delete the merge branch if it already exists. + if RunGit(['show-ref', '--quiet', '--verify', 'refs/heads/' + MERGE_BRANCH], + exit_code=True) == 0: + RunGit(['branch', '-D', MERGE_BRANCH]) + + # We might be in a directory that's present in this branch but not in the + # trunk. Move up to the top of the tree so that git commands that expect a + # valid CWD won't fail after we check out the merge branch. + rel_base_path = RunGit(['rev-parse', '--show-cdup']).strip() + if rel_base_path: + os.chdir(rel_base_path) + + # Stuff our change into the merge branch. + # We wrap in a try...finally block so if anything goes wrong, + # we clean up the branches. + try: + RunGit(['checkout', '-q', '-b', MERGE_BRANCH, base_branch]) + RunGit(['merge', '--squash', cl.GetBranchRef()]) + RunGit(['commit', '-m', description]) + # dcommit the merge branch. + output = RunGit(['svn', 'dcommit', '--no-rebase']) + finally: + # And then swap back to the original branch and clean up. + RunGit(['checkout', '-q', cl.GetBranch()]) + RunGit(['branch', '-D', MERGE_BRANCH]) + + if cl.has_issue and output.find("Committed r") != -1: + print "Closing issue (you may be prompted for your codereview password)..." + viewvc_url = settings.GetViewVCUrl() + if viewvc_url: + revision = re.compile(".*?\nCommitted r(\d+)", + re.DOTALL).match(output).group(1) + cl.description = (cl.description + + "\n\nCommitted: " + viewvc_url + revision) + cl.CloseIssue() + cl.SetIssue(0) + + +def CmdPatch(args): + parser = optparse.OptionParser(usage=('git cl patch [options] ' + '')) + parser.add_option('-b', dest='newbranch', + help='create a new branch off trunk for the patch') + parser.add_option('-f', action='store_true', dest='force', + help='with -b, clobber any existing branch') + parser.add_option('--reject', action='store_true', dest='reject', + help='allow failed patches and spew .rej files') + parser.add_option('-n', '--no-commit', action='store_true', dest='nocommit', + help="don't commit after patch applies") + (options, args) = parser.parse_args(args) + if len(args) != 1: + return parser.print_help() + input = args[0] + + if re.match(r'\d+', input): + # Input is an issue id. Figure out the URL. + issue = input + fetch = "curl --silent http://%s/%s" % (settings.GetServer(), issue) + grep = "grep -E -o '/download/issue[0-9]+_[0-9]+.diff'" + pipe = subprocess.Popen("%s | %s" % (fetch, grep), shell=True, + stdout=subprocess.PIPE) + path = pipe.stdout.read().strip() + url = 'http://%s%s' % (settings.GetServer(), path) + else: + # Assume it's a URL to the patch. + match = re.match(r'http://.*?/issue(\d+)_\d+.diff', input) + if match: + issue = match.group(1) + url = input + else: + print "Must pass an issue ID or full URL for 'Download raw patch set'" + return 1 + + if options.newbranch: + if options.force: + RunGit(['branch', '-D', options.newbranch], error_ok=True) + RunGit(['checkout', '-b', options.newbranch]) + + # Switch up to the top-level directory, if necessary, in preparation for + # applying the patch. + top = RunGit(['rev-parse', '--show-cdup']).strip() + if top: + os.chdir(top) + + # Construct a pipeline to feed the patch into "git apply". + # We use "git apply" to apply the patch instead of "patch" so that we can + # pick up file adds. + # 1) Fetch the patch. + fetch = "curl --silent %s" % url + # 2) Munge the patch. + # Git patches have a/ at the beginning of source paths. We strip that out + # with a sed script rather than the -p flag to patch so we can feed either + # Git or svn-style patches into the same apply command. + gitsed = "sed -e 's|^--- a/|--- |; s|^+++ b/|+++ |'" + # 3) Apply the patch. + # The --index flag means: also insert into the index (so we catch adds). + apply = "git apply --index -p0" + if options.reject: + apply += " --reject" + subprocess.check_call(' | '.join([fetch, gitsed, apply]), shell=True) + + # If we had an issue, commit the current state and register the issue. + if not options.nocommit: + RunGit(['commit', '-m', 'patch from issue %s' % issue]) + cl = Changelist() + cl.SetIssue(issue) + print "Committed patch." + else: + print "Patch applied to index." + +def CmdRebase(args): + # Provide a wrapper for git svn rebase to help avoid accidental + # git svn dcommit. + RunGit(['svn', 'rebase'], redirect_stdout=False) + +def GetTreeStatus(): + """Fetches the tree status and returns either 'open', 'closed', + 'unknown' or 'unset'.""" + url = settings.GetTreeStatusUrl(error_ok=True) + if url: + status = urllib2.urlopen(url).read().lower() + if status.find('closed') != -1 or status == '0': + return 'closed' + elif status.find('open') != -1 or status == '1': + return 'open' + return 'unknown' + + return 'unset' + +def CmdTreeStatus(args): + status = GetTreeStatus() + if 'unset' == status: + print 'You must configure your tree status URL by running "git cl config".' + return 2 + + print "The tree is %s" % status + if status != 'open': + return 1 + return 0 + +def CmdUpstream(args): + cl = Changelist() + print cl.GetUpstreamBranch() + +COMMANDS = [ + ('config', 'edit configuration for this tree', CmdConfig), + ('dcommit', 'commit the current changelist via git-svn', CmdDCommit), + ('issue', 'show/set current branch\'s issue number', CmdIssue), + ('patch', 'patch in a code review', CmdPatch), + ('presubmit', 'run presubmit tests on the current changelist', CmdPresubmit), + ('rebase', 'rebase current branch on top of svn repo', CmdRebase), + ('status', 'show status of changelists', CmdStatus), + ('tree', 'show the status of the tree', CmdTreeStatus), + ('upload', 'upload the current changelist to codereview', CmdUpload), + ('upstream', 'print the name of the upstream branch, if any', CmdUpstream), +] + + +def Usage(name): + print 'usage: %s ' % name + print 'commands are:' + for name, desc, _ in COMMANDS: + print ' %-10s %s' % (name, desc) + sys.exit(1) + + +def main(argv): + if len(argv) < 2: + Usage(argv[0]) + + command = argv[1] + for name, _, func in COMMANDS: + if name == command: + return func(argv[2:]) + print 'unknown command: %s' % command + Usage(argv[0]) + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/utils/prep_dist.sh b/utils/prep_dist.sh new file mode 100755 index 0000000..33d344a --- /dev/null +++ b/utils/prep_dist.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Script that prepares the codebase for building a binary distribution +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +EXIT_FAILURE=1; +EXIT_SUCCESS=0; + +# Remove support for hachoir which is GPLv2 and cannot be distributed +# in binary form. Leave the formatter because it does not link in the +# hachoir code. + +rm -f plaso/parsers/hachoir* + +sed -i"~" -e '/import hachoir/d' plaso/parsers/__init__.py + +SED_SCRIPT=" +/_slow': \[/ { +:loop + /'\],/ !{ + N + b loop + } + d +}"; + +sed -i"~" -e "${SED_SCRIPT}" plaso/frontend/presets.py + +sed -i"~" '/hachoir_/,/^$/d' utils/check_dependencies.py + +exit ${EXIT_SUCCESS}; + diff --git a/utils/pylintrc b/utils/pylintrc new file mode 100644 index 0000000..5c5311e --- /dev/null +++ b/utils/pylintrc @@ -0,0 +1,289 @@ +# File copied from: +# http://src.chromium.org/chrome/trunk/tools/depot_tools/pylintrc +# Date: 2013-06-29. +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Profiled execution. +profile=no + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + + +[MESSAGES CONTROL] + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time. +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). +# CHANGED: +# C0103: Invalid name "" +# C0111: Missing docstring +# C0302: Too many lines in module (N) +# +# F0401: Unable to import 'module' +# pylint acting strangely: plaso/lib/event.py: F0401: 26,0: Unable to import 'google.protobuf' +# +# I0010: Unable to consider inline option '' +# I0011: Locally disabling WNNNN +# +# R0201: Method could be a function +# R0801: Similar lines in N files +# R0901: Too many ancestors (8/7) +# R0902: Too many instance attributes (N/7) +# R0903: Too few public methods (N/2) +# R0904: Too many public methods (N/20) +# R0911: Too many return statements (N/6) +# R0912: Too many branches (N/12) +# R0913: Too many arguments (N/5) +# R0914: Too many local variables (N/15) +# R0915: Too many statements (N/50) +# R0921: Abstract class not referenced +# R0922: Abstract class is only referenced 1 times +# R0924: Badly implemented Container, implements __len__ but not __getitem__ (incomplete-protocol) (pylint 0.26 and later) +# W0122: Use of the exec statement +# W0141: Used builtin function '' +# W0142: Used * or ** magic +# W0402: Uses of a deprecated module 'string' +# W0404: 41: Reimport 'XX' (imported line NN) +# W0511: TODO +# W0603: Using the global statement +# W0703: Catch "Exception" +# W1201: Specify string format arguments as logging function parameters +# W0201: Variables defined initially outside the scope of __init__ (reconsider this, added by Kristinn). +disable=C0103,C0111,C0302,F0401,I0010,I0011,R0201,R0801,R0901,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0921,R0922,R0924,W0122,W0141,W0142,W0402,W0404,W0511,W0603,W0703,W1201,W0201 + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html +output-format=text + +# Include message's id in output +include-ids=yes + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". +files-output=no + +# Tells whether to display a full report or only the messages +# CHANGED: +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Add a comment according to your evaluation note. This is used by the global +# evaluation report (RP0004). +comment=no + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the beginning of the name of unused variables. +# By default this is _ and dummy but we prefer _ and unused. +dummy-variables-rgx=_|unused + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of classes names for which member attributes should not be checked +# (useful for classes with attributes dynamically set). +ignored-classes=SQLObject,twisted.internet.reactor,hashlib,google.appengine.api.memcache + +# When zope mode is activated, add a predefined set of Zope acquired attributes +# to generated-members. +zope=no + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E0201 when accessed. Python regular +# expressions are accepted. +generated-members=REQUEST,acl_users,aq_parent,multiprocessing.managers.SyncManager + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=80 + +# Maximum number of lines in a module +max-module-lines=1000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +# CHANGED: +indent-string=' ' + + +[BASIC] + +# Required attributes for module, separated by a comma +required-attributes= + +# List of builtins function names that should not be used, separated by a comma +bad-functions=map,filter,apply,input + +# Regular expression which should only match correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression which should only match correct module level names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Regular expression which should only match correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression which should only match correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct instance attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct list comprehension / +# generator expression variable names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Regular expression which should only match functions or classes name which do +# not require a docstring +no-docstring-rgx=__.*__ + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=5 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=15 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of branch for function / method body +max-branchs=12 + +# Maximum number of statements in function / method body +max-statements=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + + +[CLASSES] + +# List of interface methods to ignore, separated by a comma. This is used for +# instance to not check methods defines in Zope's Interface base class. +ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,string,TERMIOS,Bastion,rexec + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=Exception diff --git a/utils/pylintrc-1.1.0 b/utils/pylintrc-1.1.0 new file mode 100644 index 0000000..82cd8a1 --- /dev/null +++ b/utils/pylintrc-1.1.0 @@ -0,0 +1,288 @@ +# File copied from: +# http://src.chromium.org/chrome/trunk/tools/depot_tools/pylintrc +# Date: 2013-06-29. +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Profiled execution. +profile=no + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + + +[MESSAGES CONTROL] + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time. +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). +# CHANGED: +# C0103: Invalid name "" +# C0111: Missing docstring +# C0302: Too many lines in module (N) +# +# F0401: Unable to import 'module' +# pylint acting strangely: plaso/lib/event.py: F0401: 26,0: Unable to import 'google.protobuf' +# +# I0010: Unable to consider inline option '' +# I0011: Locally disabling WNNNN +# +# R0201: Method could be a function +# R0801: Similar lines in N files +# R0901: Too many ancestors (8/7) +# R0902: Too many instance attributes (N/7) +# R0903: Too few public methods (N/2) +# R0904: Too many public methods (N/20) +# R0911: Too many return statements (N/6) +# R0912: Too many branches (N/12) +# R0913: Too many arguments (N/5) +# R0914: Too many local variables (N/15) +# R0915: Too many statements (N/50) +# R0921: Abstract class not referenced +# R0922: Abstract class is only referenced 1 times +# W0122: Use of the exec statement +# W0141: Used builtin function '' +# W0142: Used * or ** magic +# W0402: Uses of a deprecated module 'string' +# W0404: 41: Reimport 'XX' (imported line NN) +# W0511: TODO +# W0603: Using the global statement +# W0703: Catch "Exception" +# W1201: Specify string format arguments as logging function parameters +# W0201: Variables defined initially outside the scope of __init__ (reconsider this, added by Kristinn). +disable=C0103,C0111,C0302,F0401,I0010,I0011,R0201,R0801,R0901,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0921,R0922,W0122,W0141,W0142,W0402,W0404,W0511,W0603,W0703,W1201,W0201 + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html +output-format=text + +# Include message's id in output +include-ids=yes + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". +files-output=no + +# Tells whether to display a full report or only the messages +# CHANGED: +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Add a comment according to your evaluation note. This is used by the global +# evaluation report (RP0004). +comment=no + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the beginning of the name of unused variables. +# By default this is _ and dummy but we prefer _ and unused. +dummy-variables-rgx=_|unused + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of classes names for which member attributes should not be checked +# (useful for classes with attributes dynamically set). +ignored-classes=SQLObject,twisted.internet.reactor,hashlib,google.appengine.api.memcache + +# When zope mode is activated, add a predefined set of Zope acquired attributes +# to generated-members. +zope=no + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E0201 when accessed. Python regular +# expressions are accepted. +generated-members=REQUEST,acl_users,aq_parent,multiprocessing.managers.SyncManager + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=80 + +# Maximum number of lines in a module +max-module-lines=1000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +# CHANGED: +indent-string=' ' + + +[BASIC] + +# Required attributes for module, separated by a comma +required-attributes= + +# List of builtins function names that should not be used, separated by a comma +bad-functions=map,filter,apply,input + +# Regular expression which should only match correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression which should only match correct module level names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Regular expression which should only match correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression which should only match correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct instance attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct list comprehension / +# generator expression variable names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Regular expression which should only match functions or classes name which do +# not require a docstring +no-docstring-rgx=__.*__ + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=5 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=15 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of branch for function / method body +max-branchs=12 + +# Maximum number of statements in function / method body +max-statements=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + + +[CLASSES] + +# List of interface methods to ignore, separated by a comma. This is used for +# instance to not check methods defines in Zope's Interface base class. +ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,string,TERMIOS,Bastion,rexec + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=Exception diff --git a/utils/review.sh b/utils/review.sh new file mode 100755 index 0000000..ee80212 --- /dev/null +++ b/utils/review.sh @@ -0,0 +1,165 @@ +#!/bin/bash +# A small script that submits a code for code review. +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +EXIT_SUCCESS=0; +EXIT_MISSING_ARGS=2; +EXIT_SUCCESS=0; + +SCRIPTNAME=`basename $0`; + +BROWSER_PARAM=""; +CACHE_PARAM=""; +USE_CL_FILE=1; + +while test $# -gt 0; +do + case $1 in + --nobrowser | --no-browser | --no_browser ) + BROWSER_PARAM="--no_oauth2_webbrowser"; + shift; + ;; + + --noclfile | --no-clfile | --no_clfile ) + USE_CL_FILE=0; + shift; + ;; + + *) + REVIEWER=$1; + shift + ;; + esac +done + +if test -z "${REVIEWER}"; +then + echo "Usage: ./${SCRIPTNAME} [--nobrowser] [--noclfile] REVIEWER"; + echo ""; + echo " REVIEWER: the email address of the reviewer that is registered with:" + echo " https://codereview.appspot.com"; + echo ""; + + exit ${EXIT_MISSING_ARGS}; +fi + +if ! test -f "utils/common.sh"; +then + echo "Missing common functions, are you in the wrong directory?"; + exit ${EXIT_FAILURE}; +fi + +. utils/common.sh + +# Check for double status codes, upload.py cannot handle these correctly. +STATUS_CODES=`git status -s | cut -b1,2 | grep '\S\S' | grep -v '??' | sort | uniq`; + +if ! test -z "${STATUS_CODES}"; +then + echo "Upload aborted - detected double git status codes." + echo "Run: 'git stash && git stash pop'."; + + exit ${EXIT_FAILURE}; +fi + +# Check if the linting is correct. +if ! linter; +then + echo "Upload aborted - fix the issues reported by the linter."; + + exit ${EXIT_FAILURE}; +fi + +# Check if all the tests pass. +if test -e run_tests.py; +then + echo "Running tests."; + python run_tests.py + + if test $? -ne 0; + then + echo "Upload aborted - fix the issues reported by the failing test."; + + exit ${EXIT_FAILURE}; + fi +fi + +MISSING_TESTS=""; +FILES=`git status -s | grep -v "^?" | awk '{if ($1 != 'D') { print $2;}}' | grep "\.py$" | grep -v "_test.py$"` +for CHANGED_FILE in ${FILES}; +do + TEST_FILE=`echo ${CHANGED_FILE} | sed -e 's/\.py//g'` + if ! test -f "${TEST_FILE}_test.py"; + then + MISSING_TESTS="${MISSING_TESTS} + ${CHANGED_FILE}" + fi +done + +if test -z "${MISSING_TESTS}"; +then + MISSING_TEST_FILES="."; +else + MISSING_TEST_FILES="These files are missing unit tests: +${MISSING_TESTS} + "; +fi + +echo -n "Short description of code review request: "; +read DESCRIPTION +TEMP_FILE=`mktemp .tmp_plaso_code_review.XXXXXX`; + +# Check if we need to set --cache. +STATUS_CODES=`git status -s | cut -b1,2 | sed 's/\s//g' | sort | uniq`; + +for STATUS_CODE in ${STATUS_CODES}; +do + if test "${STATUS_CODE}" = "A"; + then + CACHE_PARAM="--cache"; + fi +done + +if ! test -z "${BROWSER_PARAM}"; +then + echo "You need to visit: https://codereview.appspot.com/get-access-token"; + echo "and copy+paste the access token to the window (no prompt)"; +fi + +python utils/upload.py \ + --oauth2 ${BROWSER_PARAM} -y ${CACHE_PARAM} \ + -r ${REVIEWER} --cc log2timeline-dev@googlegroups.com \ + -m "${MISSING_TEST_FILES}" -t "${DESCRIPTION}" \ + --send_mail | tee ${TEMP_FILE}; + +CL=`cat ${TEMP_FILE} | grep codereview.appspot.com | awk -F '/' '/created/ {print $NF}'`; +cat ${TEMP_FILE}; +rm -f ${TEMP_FILE}; + +echo ""; + +if test -z ${CL}; +then + echo "Unable to upload code change for review."; + exit ${EXIT_FAILURE}; + +elif test ${USE_CL_FILE} -ne 0; +then + echo ${CL} > ._code_review_number; + echo "Code review number: ${CL} is saved, so no need to include that in future updates/submits."; +fi + +exit ${EXIT_SUCCESS}; diff --git a/utils/run_linter.sh b/utils/run_linter.sh new file mode 100755 index 0000000..b503473 --- /dev/null +++ b/utils/run_linter.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# A small script that runs the linter on all files. +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +EXIT_FAILURE=1; +EXIT_SUCCESS=0; + +if ! test -f "utils/common.sh" ; +then + echo "Missing common functions, are you in the wrong directory?"; + + exit ${EXIT_FAILURE}; +fi + +. utils/common.sh + +if ! linter; +then + echo "Aborted - fix the issues reported by the linter."; + + exit ${EXIT_FAILURE}; +fi + +exit ${EXIT_SUCCESS}; + diff --git a/utils/run_tests.sh b/utils/run_tests.sh new file mode 100755 index 0000000..01e0098 --- /dev/null +++ b/utils/run_tests.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# A small script that runs all tests +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +EXIT_FAILURE=1; +EXIT_SUCCESS=0; + +COVERAGE="/usr/bin/coverage"; +COVERAGE_REPORT="tests-coverage.txt"; +PYTHON="/usr/bin/python"; + +if ! test -x "${PYTHON}"; +then + # MSYS-MinGW allows to run the script using the Windows Python version. + PYTHON="/c/python27/python.exe"; +fi + +if ! test -x "${PYTHON}"; +then + echo "Unable to locate Python interpreter." + echo ""; + exit ${EXIT_FAILURE}; +fi + +if test -x "${COVERAGE}"; +then + rm -f .coverage ${COVERAGE_REPORT}; +fi + +# Run the tests in a specific order. +SUBDIRS="lib serializer winreg filters classifier engine events preprocessors parsers output analysis multi_processing frontend"; + +for SUBDIR in ${SUBDIRS}; +do + TEST_FILES=`find "plaso/${SUBDIR}" -name "*_test.py" | grep -v "\/build\/"`; + + for TEST_FILE in ${TEST_FILES}; + do + if test ${TEST_FILE} = "plaso/parsers/pcap_test.py"; + then + continue; + fi + + echo "---+ ${TEST_FILE} +---" + + if test -x "${COVERAGE}"; + then + PYTHONPATH=. ${COVERAGE} run -a ${TEST_FILE}; + else + PYTHONPATH=. ${PYTHON} ${TEST_FILE}; + fi + + if test $? -ne 0; + then + echo "TEST FAILED: ${TEST_FILE}."; + echo ""; + echo "Stopping further testing."; + echo ""; + exit ${EXIT_FAILURE}; + fi + echo ""; + done +done + +if test -x "${COVERAGE}"; +then + echo "Writing tests coverage report: ${COVERAGE_REPORT}"; + SITE_PACKAGES="/usr/lib/python2.7/site-packages"; + ${COVERAGE} report -m --omit="${SITE_PACKAGES}/*,*_test.py" > ${COVERAGE_REPORT}; + + rm -f .coverage +fi + +exit ${EXIT_SUCCESS}; + diff --git a/utils/submit.sh b/utils/submit.sh new file mode 100755 index 0000000..b5f13a8 --- /dev/null +++ b/utils/submit.sh @@ -0,0 +1,213 @@ +#!/bin/bash +# A small script that submits a code for code review. +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +EXIT_FAILURE=1; +EXIT_MISSING_ARGS=2; +EXIT_SUCCESS=0; + +SCRIPTNAME=`basename $0`; + +BROWSER_PARAM=""; +CACHE_PARAM=""; +CL_NUMBER=""; +USE_CL_FILE=0; + +while test $# -gt 0; +do + case $1 in + --nobrowser | --no-browser | --no_browser ) + BROWSER_PARAM="--no_oauth2_webbrowser"; + shift; + ;; + + *) + CL_NUMBER=$1; + shift + ;; + esac +done + +if test -z "${CL_NUMBER}"; +then + if test -f ._code_review_number; + then + CL_NUMBER=`cat ._code_review_number` + RESULT=`echo ${CL_NUMBER} | sed -e 's/[0-9]//g'`; + + if ! test -z "${RESULT}"; + then + echo "File ._code_review_number exists but contains an incorrect CL number."; + + exit ${EXIT_FAILURE}; + fi + USE_CL_FILE=1; + fi +fi + +if test -z "${CL_NUMBER}"; +then + echo "Usage: ./${SCRIPTNAME} [--nobrowser] CL_NUMBER"; + echo ""; + echo " CL_NUMBER: optional change list (CL) number that is to be submitted."; + echo " If no CL number is provided the value is read from:"; + echo " ._code_review_number"; + echo ""; + + exit ${EXIT_MISSING_ARGS}; +fi + +if ! test -f "utils/common.sh"; +then + echo "Unable to find common functions, are you in the wrong directory?"; + + exit ${EXIT_FAILURE}; +fi + +# Source the common library. +. utils/common.sh + +# Check if we're on the master branch. +BRANCH=`git branch | grep -e "^[*]" | sed "s/^[*] //"`; + +if test "${BRANCH}" != "master"; +then + echo "Submit aborted - current branch is not master."; + + exit ${EXIT_FAILURE}; +fi + +# Check for double status codes, upload.py cannot handle these correctly. +STATUS_CODES=`git status -s | cut -b1,2 | grep '\S\S' | grep -v '??' | sort | uniq`; + +if ! test -z "${STATUS_CODES}"; +then + echo "Submit aborted - detected double git status codes." + echo "Run: 'git stash && git stash pop'."; + + exit ${EXIT_FAILURE}; +fi + +# Check if the local repo is in sync with the origin. +git fetch + +if test $? -ne 0; +then + echo "Submit aborted - unable to fetch updates from origin repo"; + + exit ${EXIT_FAILURE}; +fi + +NUMBER_OF_CHANGES=`git log HEAD..origin/master --oneline | wc -l`; + +if test $? -ne 0; +then + echo "Submit aborted - unable to determine if local repo is in sync with origin"; + + exit ${EXIT_FAILURE}; +fi + +if test ${NUMBER_OF_CHANGES} -ne 0; +then + echo "Submit aborted - local repo out of sync with origin." + echo "Run: 'git stash && git pull && git stash pop'."; + + exit ${EXIT_FAILURE}; +fi + +# Check if the linting is correct. +if ! linter; +then + echo "Submit aborted - fix the issues reported by the linter."; + + exit ${EXIT_FAILURE}; +fi + +# Check if all the tests pass. +if test -e run_tests.py; +then + echo "Running tests."; + python run_tests.py + + if test $? -ne 0; + then + echo "Submit aborted - fix the issues reported by the failing test."; + + exit ${EXIT_FAILURE}; + fi +fi + +URL_CODEREVIEW="https://codereview.appspot.com"; + +# Get the description of the change list. +RESULT=`which json_xs`; + +# TODO: check if curl exists. +if ! test -z "${RESULT}"; +then + DESCRIPTION=`curl -s ${URL_CODEREVIEW}/api/${CL_NUMBER} | json_xs | grep '"subject"' | awk -F '"' '{print $(NF-1)}'`; +else + DESCRIPTION=`curl ${URL_CODEREVIEW}/${CL_NUMBER}/ -s | grep "Issue ${CL_NUMBER}" | awk -F ':' '{print $2}' | tail -1`; +fi + +if test -z "${DESCRIPTION}"; +then + echo "Submit aborted - unable to find change list with number: ${CL_NUMBER}."; + + exit ${EXIT_FAILURE}; +fi + +# Update the version information. +echo "Updating version information to match today's date." +DATE_NOW=`date +"%Y%m%d"` +sed -i -e "s/^VERSION_DATE.*$/VERSION_DATE = '${DATE_NOW}'/g" plaso/__init__.py + +COMMIT_DESCRIPTION="Code review: ${CL_NUMBER}: ${DESCRIPTION}"; +echo "Submitting ${COMMIT_DESCRIPTION}"; + +# Check if we need to set --cache. +STATUS_CODES=`git status -s | cut -b1,2 | sed 's/\s//g' | sort | uniq`; + +for STATUS_CODE in ${STATUS_CODES}; +do + if test "${STATUS_CODE}" = "A"; + then + CACHE_PARAM="--cache"; + fi +done + +python utils/upload.py \ + --oauth2 ${BROWSER_PARAM} -y -i ${CL_NUMBER} ${CACHE_PARAM} \ + -t "Submitted." -m "Code Submitted." --send_mail + +git commit -a -m "${COMMIT_DESCRIPTION}"; +git push + +if test -f "~/codereview_upload_cookies"; +then + curl -b ~/.codereview_upload_cookies ${URL_CODEREVIEW}/${CL_NUMBER}/close -d '' +else + echo "Could not find an authenticated session to codereview. You need to" + echo "manually close the ticket on the code review site." +fi + +if ! test -z "${USE_CL_FILE}" && test -f "._code_review_number"; +then + rm -f ._code_review_number +fi + +exit ${EXIT_SUCCESS}; diff --git a/utils/update.sh b/utils/update.sh new file mode 100755 index 0000000..a432567 --- /dev/null +++ b/utils/update.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# A small script that updates a change list for code review. +# +# Copyright 2012 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +EXIT_FAILURE=1; +EXIT_MISSING_ARGS=2; +EXIT_SUCCESS=0; + +SCRIPTNAME=`basename $0`; + +BROWSER_PARAM=""; +CACHE_PARAM=""; +CL_NUMBER=""; + +while test $# -gt 0; +do + case $1 in + --nobrowser | --no-browser | --no_browser ) + BROWSER_PARAM="--no_oauth2_webbrowser"; + shift; + ;; + + *) + CL_NUMBER=$1; + shift + ;; + esac +done + +if test -z "${CL_NUMBER}"; +then + if test -f ._code_review_number; + then + CL_NUMBER=`cat ._code_review_number` + RESULT=`echo ${CL_NUMBER} | sed -e 's/[0-9]//g'`; + + if ! test -z "${RESULT}"; + then + echo "File ._code_review_number exists but contains an incorrect CL number."; + + exit ${EXIT_FAILURE}; + fi + fi +fi + +if test -z "${CL_NUMBER}"; +then + echo "Usage: ./${SCRIPTNAME} [--nobrowser] [CL_NUMBER]"; + echo ""; + echo " CL_NUMBER: optional change list (CL) number that is to be updated."; + echo " If no CL number is provided the value is read from:"; + echo " ._code_review_number"; + echo ""; + + exit ${EXIT_MISSING_ARGS}; +fi + +if [ ! -f "utils/common.sh" ]; +then + echo "Missing common functions, are you in the wrong directory?"; + + exit ${EXIT_FAILURE}; +fi + +. utils/common.sh + +# Check for double status codes, upload.py cannot handle these correctly. +STATUS_CODES=`git status -s | cut -b1,2 | grep '\S\S' | grep -v '??' | sort | uniq`; + +if ! test -z "${STATUS_CODES}"; +then + echo "Update aborted - detected double git status codes." + echo "Run: 'git stash && git stash pop'."; + + exit ${EXIT_FAILURE}; +fi + +# Check if the linting is correct. +if ! linter; +then + echo "Update aborted - fix the issues reported by the linter."; + + exit ${EXIT_FAILURE}; +fi + +# Check if all the tests pass. +if test -e run_tests.py; +then + echo "Running tests."; + python run_tests.py + + if test $? -ne 0; + then + echo "Update aborted - fix the issues reported by the failing test."; + + exit ${EXIT_FAILURE}; + fi +fi + +# Check if we need to set --cache. +STATUS_CODES=`git status -s | cut -b1,2 | sed 's/\s//g' | sort | uniq`; + +for STATUS_CODE in ${STATUS_CODES}; +do + if test "${STATUS_CODE}" = "A"; + then + CACHE_PARAM="--cache"; + fi +done + +python utils/upload.py \ + --oauth2 ${BROWSER_PARAM} -y -i ${CL_NUMBER} ${CACHE_PARAM} \ + -t "Uploading changes made to code." -m "Code updated."; + +exit ${EXIT_SUCCESS}; diff --git a/utils/update_dependencies.py b/utils/update_dependencies.py new file mode 100755 index 0000000..62dc3d4 --- /dev/null +++ b/utils/update_dependencies.py @@ -0,0 +1,656 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright 2014 The Plaso Project Authors. +# Please see the AUTHORS file for details on individual authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Script to update prebuilt versions of the dependencies.""" + +import argparse +import glob +import logging +import os +import platform +import re +import subprocess +import sys +import urllib2 + +if platform.system() == 'Windows': + import wmi + + +class DownloadHelper(object): + """Class that helps in downloading a project.""" + + def __init__(self): + """Initializes the build helper.""" + super(DownloadHelper, self).__init__() + self._cached_url = u'' + self._cached_page_content = '' + + def DownloadPageContent(self, download_url): + """Downloads the page content from the URL and caches it. + + Args: + download_url: the URL where to download the page content. + + Returns: + The page content if successful, None otherwise. + """ + if not download_url: + return + + if self._cached_url != download_url: + url_object = urllib2.urlopen(download_url) + + if url_object.code != 200: + return + + self._cached_page_content = url_object.read() + self._cached_url = download_url + + return self._cached_page_content + + def DownloadFile(self, download_url): + """Downloads a file from the URL and returns the filename. + + The filename is extracted from the last part of the URL. + + Args: + download_url: the URL where to download the file. + + Returns: + The filename if successful also if the file was already downloaded + or None on error. + """ + _, _, filename = download_url.rpartition(u'/') + + if not os.path.exists(filename): + logging.info(u'Downloading: {0:s}'.format(download_url)) + + url_object = urllib2.urlopen(download_url) + if url_object.code != 200: + return + + file_object = open(filename, 'wb') + file_object.write(url_object.read()) + file_object.close() + + return filename + + +class GoogleCodeDownloadHelper(DownloadHelper): + """Class that helps in downloading a Google Code project.""" + + def GetGoogleCodeDownloadsUrl(self, project_name): + """Retrieves the Download URL from the Google Code project page. + + Args: + project_name: the name of the project. + + Returns: + The downloads URL or None on error. + """ + download_url = u'https://code.google.com/p/{0:s}/'.format(project_name) + + page_content = self.DownloadPageContent(download_url) + if not page_content: + return + + # The format of the project downloads URL is: + # https://googledrive.com/host/{random string}/ + expression_string = ( + u']*>Downloads') + matches = re.findall(expression_string, page_content) + + if not matches or len(matches) != 1: + return + + return matches[0] + + def GetPackageDownloadUrls(self, google_drive_url): + """Retrieves the package downloads URL for a given URL. + + Args: + google_drive_url: the Google Drive URL. + + Returns: + A list of package download URLs. + """ + page_content = self.DownloadPageContent(google_drive_url) + if not page_content: + return + + # The format of the project download URL is: + # /host/{random string}/3rd%20party/{sub directory}/{filename} + expression_string = u'/host/[^/]+/3rd%20party/[^/">]+/[^">]+' + matches = re.findall(expression_string, page_content) + + for match_index in range(0, len(matches)): + matches[match_index] = u'https://googledrive.com{0:s}'.format( + matches[match_index]) + + return matches + + def Download(self, download_url): + """Downloads the project for a given project name and version. + + Args: + download_url: the download URL. + + Returns: + The filename if successful also if the file was already downloaded + or None on error. + """ + return self.DownloadFile(download_url) + + +def CompareVersions(first_version_list, second_version_list): + """Compares two lists containing version parts. + + Note that the version parts can contain alpha numeric characters. + + Args: + first_version_list: the first list of version parts. + second_version_list: the second list of version parts. + + Returns: + 1 if the first is larger than the second, -1 if the first is smaller than + the second, or 0 if the first and second are equal. + """ + first_version_list_length = len(first_version_list) + second_version_list_length = len(second_version_list) + + for index in range(0, first_version_list_length): + if index >= second_version_list_length: + return 1 + + if first_version_list[index] > second_version_list[index]: + return 1 + elif first_version_list[index] < second_version_list[index]: + return -1 + + if first_version_list_length < second_version_list_length: + return -1 + + return 0 + + +def Main(): + args_parser = argparse.ArgumentParser(description=( + u'Installs the latest versions of plaso dependencies.')) + + args_parser.add_argument( + '-f', '--force', dest='force_install', action='store_true', + default=False, help=( + u'Force installation. This option removes existing versions ' + u'of installed dependencies. The default behavior is to only' + u'install a dependency if not or an older version is installed.')) + + options = args_parser.parse_args() + + operating_system = platform.system() + cpu_architecture = platform.machine().lower() + linux_name = None + sub_directory = None + noarch_sub_directory = None + + if operating_system == u'Darwin': + # TODO: determine OSX version + if cpu_architecture != u'x86_64': + logging.error(u'CPU architecture: {0:s} not supported.'.format( + cpu_architecture)) + + # Note that the sub directory should be URL encoded. + sub_directory = u'macosx%2010.10' + + elif operating_system == u'Linux': + linux_name, linux_version, _ = platform.linux_distribution() + if linux_name == u'Fedora' and linux_version == u'20': + if cpu_architecture != u'x86_64': + logging.error(u'CPU architecture: {0:s} not supported.'.format( + cpu_architecture)) + + sub_directory = u'fedora20-x86_64' + noarch_sub_directory = u'fedora20-noarch' + + elif linux_name == u'Ubuntu' and linux_version == u'12.04': + if cpu_architecture == u'i686': + sub_directory = u'ubuntu12.04-i386' + noarch_sub_directory = u'ubuntu12.04-all' + + elif cpu_architecture == u'x86_64': + sub_directory = u'ubuntu12.04-amd64' + noarch_sub_directory = u'ubuntu12.04-all' + + else: + logging.error(u'CPU architecture: {0:s} not supported.'.format( + cpu_architecture)) + + else: + logging.error(u'Linux variant: {0:s} {1:s} not supported.'.format( + linux_name, linux_version)) + + elif operating_system == u'Windows': + if cpu_architecture == u'x86': + sub_directory = u'win32-vs2008' + + elif cpu_architecture == u'amd64': + sub_directory = u'win-amd64-vs2010' + + else: + logging.error(u'CPU architecture: {0:s} not supported.'.format( + cpu_architecture)) + + else: + logging.error(u'Operating system: {0:s} not supported.'.format( + operating_system)) + return False + + download_helper = GoogleCodeDownloadHelper() + google_drive_url = download_helper.GetGoogleCodeDownloadsUrl(u'plaso') + + package_urls = download_helper.GetPackageDownloadUrls( + u'{0:s}/3rd%20party/{1:s}'.format(google_drive_url, sub_directory)) + + if noarch_sub_directory: + noarch_package_urls = download_helper.GetPackageDownloadUrls( + u'{0:s}/3rd%20party/{1:s}'.format( + google_drive_url, noarch_sub_directory)) + + package_urls.extend(noarch_package_urls) + + dependencies_directory = u'dependencies' + if not os.path.exists(dependencies_directory): + os.mkdir(dependencies_directory) + + os.chdir(dependencies_directory) + + package_filenames = {} + package_versions = {} + for package_url in package_urls: + _, _, package_filename = package_url.rpartition(u'/') + if package_filename.endswith(u'.deb'): + name, _, version = package_filename.partition(u'_') + + # Ignore devel and tools DEB packages. + if name.endswith(u'-dev') or name.endswith(u'-tools'): + continue + + if name.endswith(u'-python'): + package_prefix = name + name, _, _ = name.partition(u'-') + else: + package_prefix = u'{0:s}_'.format(name) + version, _, _ = version.partition(u'-') + + elif package_filename.endswith(u'.dmg'): + name, _, version = package_filename.partition(u'-') + version, _, _ = version.partition(u'.dmg') + package_prefix = name + + elif package_filename.endswith(u'.msi'): + name, _, version = package_filename.partition(u'-') + version, _, _ = version.partition(u'.win') + package_prefix = name + + elif package_filename.endswith(u'.rpm'): + name, _, version = package_filename.partition(u'-') + + # Ignore debuginfo, devel and tools RPM packages. + if (version.startswith(u'debuginfo') or version.startswith(u'devel') or + version.startswith(u'tools')): + continue + + # Ignore the sleuthkit tools RPM package. + if name == u'sleuthkit' and not version.startswith(u'libs'): + continue + + package_prefix, _, version = version.partition(u'-') + version, _, _ = version.partition(u'-') + package_prefix = u'{0:s}-{1:s}'.format(name, package_prefix) + + else: + # Ignore all other file exensions. + continue + + version = version.split(u'.') + if name == u'pytsk': + last_part = version.pop() + version.extend(last_part.split(u'-')) + + if name not in package_versions: + compare_result = 1 + else: + compare_result = CompareVersions(version, package_versions[name]) + + if compare_result > 0: + package_filenames[name] = package_filename + package_versions[name] = version + + if not os.path.exists(package_filename): + filenames = glob.glob(u'{0:s}*'.format(package_prefix)) + for filename in filenames: + print u'Removing: {0:s}'.format(filename) + os.remove(filename) + + print u'Downloading: {0:s}'.format(package_filename) + _ = download_helper.Download(package_url) + + os.chdir(u'..') + + if operating_system == u'Darwin': + result = True + + command = u'/usr/sbin/pkgutil --packages' + print 'Running: "{0:s}"'.format(command) + process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True) + if process.returncode is None: + packages, _ = process.communicate() + else: + packages = '' + + if process.returncode != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + return False + + for package_name in packages.split('\n'): + if not package_name: + continue + + if (package_name.startswith(u'com.github.libyal.') or + package_name.startswith(u'com.github.log2timeline.') or + package_name.startswith(u'com.github.sleuthkit.') or + package_name.startswith(u'com.google.code.p.') or + package_name.startswith(u'org.samba.') or + package_name.startswith(u'org.python.pypi.') or + package_name.startswith(u'net.sourceforge.projects.')): + + if package_name.startswith(u'com.github.libyal.'): + name = package_name[18:] + + elif package_name.startswith(u'com.github.log2timeline.'): + name = package_name[24:] + + elif package_name.startswith(u'com.github.sleuthkit.'): + name = package_name[21:] + + elif package_name.startswith(u'com.google.code.p.'): + name = package_name[18:] + + elif package_name.startswith(u'org.samba.'): + name = package_name[10:] + + elif package_name.startswith(u'org.python.pypi.'): + name = package_name[16:] + + elif package_name.startswith(u'net.sourceforge.projects.'): + name = package_name[25:] + + # Detect the PackageMaker naming convention. + if name.endswith(u'.pkg'): + _, _, sub_name = name[:-4].rpartition(u'.') + is_package_maker_pkg = True + else: + is_package_maker_pkg = False + name, _, _ = name.partition(u'.') + + if name in package_versions: + # Determine the package version. + command = u'/usr/sbin/pkgutil --pkg-info {0:s}'.format(package_name) + print 'Running: "{0:s}"'.format(command) + process = subprocess.Popen( + command, stdout=subprocess.PIPE, shell=True) + if process.returncode is None: + package_info, _ = process.communicate() + else: + package_info = '' + + if process.returncode != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + result = False + continue + + location = None + version = None + volume = None + for attribute in package_info.split('\n'): + if attribute.startswith(u'location: '): + _, _, location = attribute.rpartition(u'location: ') + + elif attribute.startswith(u'version: '): + _, _, version = attribute.rpartition(u'version: ') + + elif attribute.startswith(u'volume: '): + _, _, volume = attribute.rpartition(u'volume: ') + + version = version.split(u'.') + if options.force_install: + compare_result = -1 + elif name not in package_versions: + compare_result = 1 + # TODO: handle pytsk. + else: + compare_result = CompareVersions(version, package_versions[name]) + if compare_result >= 0: + # The latest or newer version is already installed. + del package_versions[name] + + if compare_result < 0: + # Determine the files in the package. + command = u'/usr/sbin/pkgutil --files {0:s}'.format(package_name) + print 'Running: "{0:s}"'.format(command) + process = subprocess.Popen( + command, stdout=subprocess.PIPE, shell=True) + if process.returncode is None: + package_files, _ = process.communicate() + else: + package_files = '' + + if process.returncode != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + result = False + continue + + directories = [] + files = [] + for filename in package_files.split('\n'): + if is_package_maker_pkg: + filename = u'{0:s}{1:s}/{2:s}/{3:s}'.format( + volume, location, sub_name, filename) + else: + filename = u'{0:s}{1:s}'.format(location, filename) + + if os.path.isdir(filename): + directories.append(filename) + else: + files.append(filename) + + print 'Removing: {0:s} {1:s}'.format(name, version) + for filename in files: + if os.path.exists(filename): + os.remove(filename) + + for filename in directories: + if os.path.exists(filename): + try: + os.rmdir(filename) + except OSError: + # Ignore directories that are not empty. + pass + + command = u'/usr/sbin/pkgutil --forget {0:s}'.format( + package_name) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + result = False + + if not result: + return False + + elif operating_system == u'Windows': + connection = wmi.WMI() + + query = u'SELECT Name FROM Win32_Product' + for product in connection.query(query): + name = getattr(product, 'Name', u'') + # Windows package names start with 'Python' or 'Python 2.7 '. + if name.startswith('Python '): + _, _, name = name.rpartition(u' ') + if name.startswith('2.7 '): + _, _, name = name.rpartition(u' ') + + name, _, version = name.partition(u'-') + + version = version.split(u'.') + if options.force_install: + compare_result = -1 + elif name not in package_versions: + compare_result = 1 + elif name == u'pytsk': + # We cannot really tell by the version number that pytsk needs to + # be update. Just update it any way. + compare_result = -1 + else: + compare_result = CompareVersions(version, package_versions[name]) + if compare_result >= 0: + # The latest or newer version is already installed. + del package_versions[name] + + if compare_result < 0: + print 'Removing: {0:s} {1:s}'.format(name, u'.'.join(version)) + product.Uninstall() + + result = True + + if operating_system == u'Darwin': + for name, version in package_versions.iteritems(): + package_filename = package_filenames[name] + + command = u'sudo /usr/bin/hdiutil attach {0:s}'.format( + os.path.join(dependencies_directory, package_filename)) + print 'Running: "{0:s}"'.format(command) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + result = False + continue + + volume_path = u'/Volumes/{0:s}.pkg'.format(package_filename[:-4]) + if not os.path.exists(volume_path): + logging.error(u'Missing volume: {0:s}.'.format(volume_path)) + result = False + continue + + pkg_file = u'{0:s}/{1:s}.pkg'.format(volume_path, package_filename[:-4]) + if not os.path.exists(pkg_file): + logging.error(u'Missing pkg file: {0:s}.'.format(pkg_file)) + result = False + continue + + command = u'sudo /usr/sbin/installer -target / -pkg {0:s}'.format( + pkg_file) + print 'Running: "{0:s}"'.format(command) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + result = False + + command = u'sudo /usr/bin/hdiutil detach {0:s}'.format(volume_path) + print 'Running: "{0:s}"'.format(command) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + result = False + + elif operating_system == u'Linux': + if linux_name == u'Fedora': + # TODO: move these to a separate file? + dependencies = [ + u'ipython', + u'libyaml' + u'python-dateutil', + u'pyparsing', + u'pytz', + u'PyYAML', + u'protobuf-python'] + + command = u'sudo yum install {0:s}'.format(u' '.join(dependencies)) + print 'Running: "{0:s}"'.format(command) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + result = False + + command = u'sudo rpm -Fvh {0:s}/*'.format(dependencies_directory) + print 'Running: "{0:s}"'.format(command) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + result = False + + elif linux_name == u'Ubuntu': + # TODO: add -dbg package support. + # TODO: move these to a separate file? + dependencies = [ + u'ipython', + u'libprotobuf7', + u'libyaml-0-2', + u'python-bencode', + u'python-dateutil', + u'python-dpkt', + u'python-hachoir-core', + u'python-hachoir-metadata', + u'python-hachoir-parser', + u'python-protobuf', + u'python-six', + u'python-tz', + u'python-yaml'] + + command = u'sudo apt-get install {0:s}'.format(u' '.join(dependencies)) + print 'Running: "{0:s}"'.format(command) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + result = False + + command = u'sudo dpkg -i {0:s}/*.deb'.format(dependencies_directory) + print 'Running: "{0:s}"'.format(command) + exit_code = subprocess.call(command, shell=True) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + result = False + + elif operating_system == u'Windows': + for name, version in package_versions.iteritems(): + # TODO: add RunAs ? + package_filename = package_filenames[name] + command = u'msiexec.exe /i {0:s} /q'.format(os.path.join( + dependencies_directory, package_filename)) + print 'Installing: {0:s} {1:s}'.format(name, u'.'.join(version)) + exit_code = subprocess.call(command, shell=False) + if exit_code != 0: + logging.error(u'Running: "{0:s}" failed.'.format(command)) + result = False + + return result + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/utils/upload.py b/utils/upload.py new file mode 100644 index 0000000..57487dc --- /dev/null +++ b/utils/upload.py @@ -0,0 +1,2645 @@ +#!/usr/bin/env python +# coding: utf-8 +# +# Copyright 2007 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tool for uploading diffs from a version control system to the codereview app. + +Usage summary: upload.py [options] [-- diff_options] [path...] + +Diff options are passed to the diff command of the underlying system. + +Supported version control systems: + Git + Mercurial + Subversion + Perforce + CVS + +It is important for Git/Mercurial users to specify a tree/node/branch to diff +against by using the '--rev' option. +""" +# This code is derived from appcfg.py in the App Engine SDK (open source), +# and from ASPN recipe #146306. + +import BaseHTTPServer +import ConfigParser +import cookielib +import errno +import fnmatch +import getpass +import logging +import marshal +import mimetypes +import optparse +import os +import re +import socket +import subprocess +import sys +import urllib +import urllib2 +import urlparse +import webbrowser + +# The md5 module was deprecated in Python 2.5. +try: + from hashlib import md5 +except ImportError: + from md5 import md5 + +try: + import readline +except ImportError: + pass + +try: + import keyring +except ImportError: + keyring = None + +# The logging verbosity: +# 0: Errors only. +# 1: Status messages. +# 2: Info logs. +# 3: Debug logs. +verbosity = 1 + +# The account type used for authentication. +# This line could be changed by the review server (see handler for +# upload.py). +AUTH_ACCOUNT_TYPE = "GOOGLE" + +# URL of the default review server. As for AUTH_ACCOUNT_TYPE, this line could be +# changed by the review server (see handler for upload.py). +DEFAULT_REVIEW_SERVER = "codereview.appspot.com" + +# Max size of patch or base file. +MAX_UPLOAD_SIZE = 900 * 1024 + +# Constants for version control names. Used by GuessVCSName. +VCS_GIT = "Git" +VCS_MERCURIAL = "Mercurial" +VCS_SUBVERSION = "Subversion" +VCS_PERFORCE = "Perforce" +VCS_CVS = "CVS" +VCS_UNKNOWN = "Unknown" + +VCS_ABBREVIATIONS = { + VCS_MERCURIAL.lower(): VCS_MERCURIAL, + "hg": VCS_MERCURIAL, + VCS_SUBVERSION.lower(): VCS_SUBVERSION, + "svn": VCS_SUBVERSION, + VCS_PERFORCE.lower(): VCS_PERFORCE, + "p4": VCS_PERFORCE, + VCS_GIT.lower(): VCS_GIT, + VCS_CVS.lower(): VCS_CVS, +} + +# OAuth 2.0-Related Constants +LOCALHOST_IP = '127.0.0.1' +DEFAULT_OAUTH2_PORT = 8001 +ACCESS_TOKEN_PARAM = 'access_token' +ERROR_PARAM = 'error' +OAUTH_DEFAULT_ERROR_MESSAGE = 'OAuth 2.0 error occurred.' +OAUTH_PATH = '/get-access-token' +OAUTH_PATH_PORT_TEMPLATE = OAUTH_PATH + '?port=%(port)d' +AUTH_HANDLER_RESPONSE = """\ + + + Authentication Status + + + +

The authentication flow has completed.

+ + +""" +# Borrowed from google-api-python-client +OPEN_LOCAL_MESSAGE_TEMPLATE = """\ +Your browser has been opened to visit: + + %s + +If your browser is on a different machine then exit and re-run +upload.py with the command-line parameter + + --no_oauth2_webbrowser +""" +NO_OPEN_LOCAL_MESSAGE_TEMPLATE = """\ +Go to the following link in your browser: + + %s + +and copy the access token. +""" + +# The result of parsing Subversion's [auto-props] setting. +svn_auto_props_map = None + +def GetEmail(prompt): + """Prompts the user for their email address and returns it. + + The last used email address is saved to a file and offered up as a suggestion + to the user. If the user presses enter without typing in anything the last + used email address is used. If the user enters a new address, it is saved + for next time we prompt. + + """ + last_email_file_name = os.path.expanduser("~/.last_codereview_email_address") + last_email = "" + if os.path.exists(last_email_file_name): + try: + last_email_file = open(last_email_file_name, "r") + last_email = last_email_file.readline().strip("\n") + last_email_file.close() + prompt += " [%s]" % last_email + except IOError, e: + pass + email = raw_input(prompt + ": ").strip() + if email: + try: + last_email_file = open(last_email_file_name, "w") + last_email_file.write(email) + last_email_file.close() + except IOError, e: + pass + else: + email = last_email + return email + + +def StatusUpdate(msg): + """Print a status message to stdout. + + If 'verbosity' is greater than 0, print the message. + + Args: + msg: The string to print. + """ + if verbosity > 0: + print msg + + +def ErrorExit(msg): + """Print an error message to stderr and exit.""" + print >>sys.stderr, msg + sys.exit(1) + + +class ClientLoginError(urllib2.HTTPError): + """Raised to indicate there was an error authenticating with ClientLogin.""" + + def __init__(self, url, code, msg, headers, args): + urllib2.HTTPError.__init__(self, url, code, msg, headers, None) + self.args = args + self._reason = args["Error"] + self.info = args.get("Info", None) + + @property + def reason(self): + # reason is a property on python 2.7 but a member variable on <=2.6. + # self.args is modified so it cannot be used as-is so save the value in + # self._reason. + return self._reason + + +class AbstractRpcServer(object): + """Provides a common interface for a simple RPC server.""" + + def __init__(self, host, auth_function, host_override=None, + extra_headers=None, save_cookies=False, + account_type=AUTH_ACCOUNT_TYPE): + """Creates a new AbstractRpcServer. + + Args: + host: The host to send requests to. + auth_function: A function that takes no arguments and returns an + (email, password) tuple when called. Will be called if authentication + is required. + host_override: The host header to send to the server (defaults to host). + extra_headers: A dict of extra headers to append to every request. + save_cookies: If True, save the authentication cookies to local disk. + If False, use an in-memory cookiejar instead. Subclasses must + implement this functionality. Defaults to False. + account_type: Account type used for authentication. Defaults to + AUTH_ACCOUNT_TYPE. + """ + self.host = host + if (not self.host.startswith("http://") and + not self.host.startswith("https://")): + self.host = "http://" + self.host + self.host_override = host_override + self.auth_function = auth_function + self.authenticated = False + self.extra_headers = extra_headers or {} + self.save_cookies = save_cookies + self.account_type = account_type + self.opener = self._GetOpener() + if self.host_override: + logging.info("Server: %s; Host: %s", self.host, self.host_override) + else: + logging.info("Server: %s", self.host) + + def _GetOpener(self): + """Returns an OpenerDirector for making HTTP requests. + + Returns: + A urllib2.OpenerDirector object. + """ + raise NotImplementedError() + + def _CreateRequest(self, url, data=None): + """Creates a new urllib request.""" + logging.debug("Creating request for: '%s' with payload:\n%s", url, data) + req = urllib2.Request(url, data=data, headers={"Accept": "text/plain"}) + if self.host_override: + req.add_header("Host", self.host_override) + for key, value in self.extra_headers.iteritems(): + req.add_header(key, value) + return req + + def _GetAuthToken(self, email, password): + """Uses ClientLogin to authenticate the user, returning an auth token. + + Args: + email: The user's email address + password: The user's password + + Raises: + ClientLoginError: If there was an error authenticating with ClientLogin. + HTTPError: If there was some other form of HTTP error. + + Returns: + The authentication token returned by ClientLogin. + """ + account_type = self.account_type + if self.host.endswith(".google.com"): + # Needed for use inside Google. + account_type = "HOSTED" + req = self._CreateRequest( + url="https://www.google.com/accounts/ClientLogin", + data=urllib.urlencode({ + "Email": email, + "Passwd": password, + "service": "ah", + "source": "rietveld-codereview-upload", + "accountType": account_type, + }), + ) + try: + response = self.opener.open(req) + response_body = response.read() + response_dict = dict(x.split("=") + for x in response_body.split("\n") if x) + return response_dict["Auth"] + except urllib2.HTTPError, e: + if e.code == 403: + body = e.read() + response_dict = dict(x.split("=", 1) for x in body.split("\n") if x) + raise ClientLoginError(req.get_full_url(), e.code, e.msg, + e.headers, response_dict) + else: + raise + + def _GetAuthCookie(self, auth_token): + """Fetches authentication cookies for an authentication token. + + Args: + auth_token: The authentication token returned by ClientLogin. + + Raises: + HTTPError: If there was an error fetching the authentication cookies. + """ + # This is a dummy value to allow us to identify when we're successful. + continue_location = "http://localhost/" + args = {"continue": continue_location, "auth": auth_token} + req = self._CreateRequest("%s/_ah/login?%s" % + (self.host, urllib.urlencode(args))) + try: + response = self.opener.open(req) + except urllib2.HTTPError, e: + response = e + if (response.code != 302 or + response.info()["location"] != continue_location): + raise urllib2.HTTPError(req.get_full_url(), response.code, response.msg, + response.headers, response.fp) + self.authenticated = True + + def _Authenticate(self): + """Authenticates the user. + + The authentication process works as follows: + 1) We get a username and password from the user + 2) We use ClientLogin to obtain an AUTH token for the user + (see http://code.google.com/apis/accounts/AuthForInstalledApps.html). + 3) We pass the auth token to /_ah/login on the server to obtain an + authentication cookie. If login was successful, it tries to redirect + us to the URL we provided. + + If we attempt to access the upload API without first obtaining an + authentication cookie, it returns a 401 response (or a 302) and + directs us to authenticate ourselves with ClientLogin. + """ + for i in range(3): + credentials = self.auth_function() + try: + auth_token = self._GetAuthToken(credentials[0], credentials[1]) + except ClientLoginError, e: + print >>sys.stderr, '' + if e.reason == "BadAuthentication": + if e.info == "InvalidSecondFactor": + print >>sys.stderr, ( + "Use an application-specific password instead " + "of your regular account password.\n" + "See http://www.google.com/" + "support/accounts/bin/answer.py?answer=185833") + else: + print >>sys.stderr, "Invalid username or password." + elif e.reason == "CaptchaRequired": + print >>sys.stderr, ( + "Please go to\n" + "https://www.google.com/accounts/DisplayUnlockCaptcha\n" + "and verify you are a human. Then try again.\n" + "If you are using a Google Apps account the URL is:\n" + "https://www.google.com/a/yourdomain.com/UnlockCaptcha") + elif e.reason == "NotVerified": + print >>sys.stderr, "Account not verified." + elif e.reason == "TermsNotAgreed": + print >>sys.stderr, "User has not agreed to TOS." + elif e.reason == "AccountDeleted": + print >>sys.stderr, "The user account has been deleted." + elif e.reason == "AccountDisabled": + print >>sys.stderr, "The user account has been disabled." + break + elif e.reason == "ServiceDisabled": + print >>sys.stderr, ("The user's access to the service has been " + "disabled.") + elif e.reason == "ServiceUnavailable": + print >>sys.stderr, "The service is not available; try again later." + else: + # Unknown error. + raise + print >>sys.stderr, '' + continue + self._GetAuthCookie(auth_token) + return + + def Send(self, request_path, payload=None, + content_type="application/octet-stream", + timeout=None, + extra_headers=None, + **kwargs): + """Sends an RPC and returns the response. + + Args: + request_path: The path to send the request to, eg /api/appversion/create. + payload: The body of the request, or None to send an empty request. + content_type: The Content-Type header to use. + timeout: timeout in seconds; default None i.e. no timeout. + (Note: for large requests on OS X, the timeout doesn't work right.) + extra_headers: Dict containing additional HTTP headers that should be + included in the request (string header names mapped to their values), + or None to not include any additional headers. + kwargs: Any keyword arguments are converted into query string parameters. + + Returns: + The response body, as a string. + """ + # TODO: Don't require authentication. Let the server say + # whether it is necessary. + if not self.authenticated: + self._Authenticate() + + old_timeout = socket.getdefaulttimeout() + socket.setdefaulttimeout(timeout) + try: + tries = 0 + while True: + tries += 1 + args = dict(kwargs) + url = "%s%s" % (self.host, request_path) + if args: + url += "?" + urllib.urlencode(args) + req = self._CreateRequest(url=url, data=payload) + req.add_header("Content-Type", content_type) + if extra_headers: + for header, value in extra_headers.items(): + req.add_header(header, value) + try: + f = self.opener.open(req) + response = f.read() + f.close() + return response + except urllib2.HTTPError, e: + if tries > 3: + raise + elif e.code == 401 or e.code == 302: + self._Authenticate() + elif e.code == 301: + # Handle permanent redirect manually. + url = e.info()["location"] + url_loc = urlparse.urlparse(url) + self.host = '%s://%s' % (url_loc[0], url_loc[1]) + elif e.code >= 500: + ErrorExit(e.read()) + else: + raise + finally: + socket.setdefaulttimeout(old_timeout) + + +class HttpRpcServer(AbstractRpcServer): + """Provides a simplified RPC-style interface for HTTP requests.""" + + def _Authenticate(self): + """Save the cookie jar after authentication.""" + if isinstance(self.auth_function, OAuth2Creds): + access_token = self.auth_function() + if access_token is not None: + self.extra_headers['Authorization'] = 'OAuth %s' % (access_token,) + self.authenticated = True + else: + super(HttpRpcServer, self)._Authenticate() + if self.save_cookies: + StatusUpdate("Saving authentication cookies to %s" % self.cookie_file) + self.cookie_jar.save() + + def _GetOpener(self): + """Returns an OpenerDirector that supports cookies and ignores redirects. + + Returns: + A urllib2.OpenerDirector object. + """ + opener = urllib2.OpenerDirector() + opener.add_handler(urllib2.ProxyHandler()) + opener.add_handler(urllib2.UnknownHandler()) + opener.add_handler(urllib2.HTTPHandler()) + opener.add_handler(urllib2.HTTPDefaultErrorHandler()) + opener.add_handler(urllib2.HTTPSHandler()) + opener.add_handler(urllib2.HTTPErrorProcessor()) + if self.save_cookies: + self.cookie_file = os.path.expanduser("~/.codereview_upload_cookies") + self.cookie_jar = cookielib.MozillaCookieJar(self.cookie_file) + if os.path.exists(self.cookie_file): + try: + self.cookie_jar.load() + self.authenticated = True + StatusUpdate("Loaded authentication cookies from %s" % + self.cookie_file) + except (cookielib.LoadError, IOError): + # Failed to load cookies - just ignore them. + pass + else: + # Create an empty cookie file with mode 600 + fd = os.open(self.cookie_file, os.O_CREAT, 0600) + os.close(fd) + # Always chmod the cookie file + os.chmod(self.cookie_file, 0600) + else: + # Don't save cookies across runs of update.py. + self.cookie_jar = cookielib.CookieJar() + opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar)) + return opener + + +class CondensedHelpFormatter(optparse.IndentedHelpFormatter): + """Frees more horizontal space by removing indentation from group + options and collapsing arguments between short and long, e.g. + '-o ARG, --opt=ARG' to -o --opt ARG""" + + def format_heading(self, heading): + return "%s:\n" % heading + + def format_option(self, option): + self.dedent() + res = optparse.HelpFormatter.format_option(self, option) + self.indent() + return res + + def format_option_strings(self, option): + self.set_long_opt_delimiter(" ") + optstr = optparse.HelpFormatter.format_option_strings(self, option) + optlist = optstr.split(", ") + if len(optlist) > 1: + if option.takes_value(): + # strip METAVAR from all but the last option + optlist = [x.split()[0] for x in optlist[:-1]] + optlist[-1:] + optstr = " ".join(optlist) + return optstr + + +parser = optparse.OptionParser( + usage=("%prog [options] [-- diff_options] [path...]\n" + "See also: http://code.google.com/p/rietveld/wiki/UploadPyUsage"), + add_help_option=False, + formatter=CondensedHelpFormatter() +) +parser.add_option("-h", "--help", action="store_true", + help="Show this help message and exit.") +parser.add_option("-y", "--assume_yes", action="store_true", + dest="assume_yes", default=False, + help="Assume that the answer to yes/no questions is 'yes'.") +# Logging +group = parser.add_option_group("Logging options") +group.add_option("-q", "--quiet", action="store_const", const=0, + dest="verbose", help="Print errors only.") +group.add_option("-v", "--verbose", action="store_const", const=2, + dest="verbose", default=1, + help="Print info level logs.") +group.add_option("--noisy", action="store_const", const=3, + dest="verbose", help="Print all logs.") +group.add_option("--print_diffs", dest="print_diffs", action="store_true", + help="Print full diffs.") +# Review server +group = parser.add_option_group("Review server options") +group.add_option("-s", "--server", action="store", dest="server", + default=DEFAULT_REVIEW_SERVER, + metavar="SERVER", + help=("The server to upload to. The format is host[:port]. " + "Defaults to '%default'.")) +group.add_option("-e", "--email", action="store", dest="email", + metavar="EMAIL", default=None, + help="The username to use. Will prompt if omitted.") +group.add_option("-H", "--host", action="store", dest="host", + metavar="HOST", default=None, + help="Overrides the Host header sent with all RPCs.") +group.add_option("--no_cookies", action="store_false", + dest="save_cookies", default=True, + help="Do not save authentication cookies to local disk.") +group.add_option("--oauth2", action="store_true", + dest="use_oauth2", default=False, + help="Use OAuth 2.0 instead of a password.") +group.add_option("--oauth2_port", action="store", type="int", + dest="oauth2_port", default=DEFAULT_OAUTH2_PORT, + help=("Port to use to handle OAuth 2.0 redirect. Must be an " + "integer in the range 1024-49151, defaults to " + "'%default'.")) +group.add_option("--no_oauth2_webbrowser", action="store_false", + dest="open_oauth2_local_webbrowser", default=True, + help="Don't open a browser window to get an access token.") +group.add_option("--account_type", action="store", dest="account_type", + metavar="TYPE", default=AUTH_ACCOUNT_TYPE, + choices=["GOOGLE", "HOSTED"], + help=("Override the default account type " + "(defaults to '%default', " + "valid choices are 'GOOGLE' and 'HOSTED').")) +# Issue +group = parser.add_option_group("Issue options") +group.add_option("-t", "--title", action="store", dest="title", + help="New issue subject or new patch set title") +group.add_option("-m", "--message", action="store", dest="message", + default=None, + help="New issue description or new patch set message") +group.add_option("-F", "--file", action="store", dest="file", + default=None, help="Read the message above from file.") +group.add_option("-r", "--reviewers", action="store", dest="reviewers", + metavar="REVIEWERS", default=None, + help="Add reviewers (comma separated email addresses).") +group.add_option("--cc", action="store", dest="cc", + metavar="CC", default='log2timeline-dev@googlegroups.com', + help="Add CC (comma separated email addresses).") +group.add_option("--private", action="store_true", dest="private", + default=False, + help="Make the issue restricted to reviewers and those CCed") +# Upload options +group = parser.add_option_group("Patch options") +group.add_option("-i", "--issue", type="int", action="store", + metavar="ISSUE", default=None, + help="Issue number to which to add. Defaults to new issue.") +group.add_option("--cache", action="store_true", dest="add_cache", + default=False, help="Add git cache parameter for new files.") +group.add_option("--base_url", action="store", dest="base_url", default=None, + help="Base URL path for files (listed as \"Base URL\" when " + "viewing issue). If omitted, will be guessed automatically " + "for SVN repos and left blank for others.") +group.add_option("--download_base", action="store_true", + dest="download_base", default=False, + help="Base files will be downloaded by the server " + "(side-by-side diffs may not work on files with CRs).") +group.add_option("--rev", action="store", dest="revision", + metavar="REV", default=None, + help="Base revision/branch/tree to diff against. Use " + "rev1:rev2 range to review already committed changeset.") +group.add_option("--send_mail", action="store_true", + dest="send_mail", default=False, + help="Send notification email to reviewers.") +group.add_option("-p", "--send_patch", action="store_true", + dest="send_patch", default=False, + help="Same as --send_mail, but include diff as an " + "attachment, and prepend email subject with 'PATCH:'.") +group.add_option("--vcs", action="store", dest="vcs", + metavar="VCS", default=None, + help=("Version control system (optional, usually upload.py " + "already guesses the right VCS).")) +group.add_option("--emulate_svn_auto_props", action="store_true", + dest="emulate_svn_auto_props", default=False, + help=("Emulate Subversion's auto properties feature.")) +# Git-specific +group = parser.add_option_group("Git-specific options") +group.add_option("--git_similarity", action="store", dest="git_similarity", + metavar="SIM", type="int", default=50, + help=("Set the minimum similarity index for detecting renames " + "and copies. See `git diff -C`. (default 50).")) +group.add_option("--git_no_find_copies", action="store_false", default=True, + dest="git_find_copies", + help=("Prevents git from looking for copies (default off).")) +# Perforce-specific +group = parser.add_option_group("Perforce-specific options " + "(overrides P4 environment variables)") +group.add_option("--p4_port", action="store", dest="p4_port", + metavar="P4_PORT", default=None, + help=("Perforce server and port (optional)")) +group.add_option("--p4_changelist", action="store", dest="p4_changelist", + metavar="P4_CHANGELIST", default=None, + help=("Perforce changelist id")) +group.add_option("--p4_client", action="store", dest="p4_client", + metavar="P4_CLIENT", default=None, + help=("Perforce client/workspace")) +group.add_option("--p4_user", action="store", dest="p4_user", + metavar="P4_USER", default=None, + help=("Perforce user")) + + +# OAuth 2.0 Methods and Helpers +class ClientRedirectServer(BaseHTTPServer.HTTPServer): + """A server for redirects back to localhost from the associated server. + + Waits for a single request and parses the query parameters for an access token + or an error and then stops serving. + """ + access_token = None + error = None + + +class ClientRedirectHandler(BaseHTTPServer.BaseHTTPRequestHandler): + """A handler for redirects back to localhost from the associated server. + + Waits for a single request and parses the query parameters into the server's + access_token or error and then stops serving. + """ + + def SetResponseValue(self): + """Stores the access token or error from the request on the server. + + Will only do this if exactly one query parameter was passed in to the + request and that query parameter used 'access_token' or 'error' as the key. + """ + query_string = urlparse.urlparse(self.path).query + query_params = urlparse.parse_qs(query_string) + + if len(query_params) == 1: + if query_params.has_key(ACCESS_TOKEN_PARAM): + access_token_list = query_params[ACCESS_TOKEN_PARAM] + if len(access_token_list) == 1: + self.server.access_token = access_token_list[0] + else: + error_list = query_params.get(ERROR_PARAM, []) + if len(error_list) == 1: + self.server.error = error_list[0] + + def do_GET(self): + """Handle a GET request. + + Parses and saves the query parameters and prints a message that the server + has completed its lone task (handling a redirect). + + Note that we can't detect if an error occurred. + """ + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.SetResponseValue() + self.wfile.write(AUTH_HANDLER_RESPONSE) + + def log_message(self, format, *args): + """Do not log messages to stdout while running as command line program.""" + pass + + +def OpenOAuth2ConsentPage(server=DEFAULT_REVIEW_SERVER, + port=DEFAULT_OAUTH2_PORT): + """Opens the OAuth 2.0 consent page or prints instructions how to. + + Uses the webbrowser module to open the OAuth server side page in a browser. + + Args: + server: String containing the review server URL. Defaults to + DEFAULT_REVIEW_SERVER. + port: Integer, the port where the localhost server receiving the redirect + is serving. Defaults to DEFAULT_OAUTH2_PORT. + + Returns: + A boolean indicating whether the page opened successfully. + """ + path = OAUTH_PATH_PORT_TEMPLATE % {'port': port} + parsed_url = urlparse.urlparse(server) + scheme = parsed_url[0] or 'https' + if scheme != 'https': + ErrorExit('Using OAuth requires a review server with SSL enabled.') + # If no scheme was given on command line the server address ends up in + # parsed_url.path otherwise in netloc. + host = parsed_url[1] or parsed_url[2] + page = '%s://%s%s' % (scheme, host, path) + page_opened = webbrowser.open(page, new=1, autoraise=True) + if page_opened: + print OPEN_LOCAL_MESSAGE_TEMPLATE % (page,) + return page_opened + + +def WaitForAccessToken(port=DEFAULT_OAUTH2_PORT): + """Spins up a simple HTTP Server to handle a single request. + + Intended to handle a single redirect from the production server after the + user authenticated via OAuth 2.0 with the server. + + Args: + port: Integer, the port where the localhost server receiving the redirect + is serving. Defaults to DEFAULT_OAUTH2_PORT. + + Returns: + The access token passed to the localhost server, or None if no access token + was passed. + """ + httpd = ClientRedirectServer((LOCALHOST_IP, port), ClientRedirectHandler) + # Wait to serve just one request before deferring control back + # to the caller of wait_for_refresh_token + httpd.handle_request() + if httpd.access_token is None: + ErrorExit(httpd.error or OAUTH_DEFAULT_ERROR_MESSAGE) + return httpd.access_token + + +def GetAccessToken(server=DEFAULT_REVIEW_SERVER, port=DEFAULT_OAUTH2_PORT, + open_local_webbrowser=True): + """Gets an Access Token for the current user. + + Args: + server: String containing the review server URL. Defaults to + DEFAULT_REVIEW_SERVER. + port: Integer, the port where the localhost server receiving the redirect + is serving. Defaults to DEFAULT_OAUTH2_PORT. + open_local_webbrowser: Boolean, defaults to True. If set, opens a page in + the user's browser. + + Returns: + A string access token that was sent to the local server. If the serving page + via WaitForAccessToken does not receive an access token, this method + returns None. + """ + access_token = None + if open_local_webbrowser: + page_opened = OpenOAuth2ConsentPage(server=server, port=port) + if page_opened: + try: + access_token = WaitForAccessToken(port=port) + except socket.error, e: + print 'Can\'t start local webserver. Socket Error: %s\n' % (e.strerror,) + + if access_token is None: + # TODO(dhermes): Offer to add to clipboard using xsel, xclip, pbcopy, etc. + page = 'https://%s%s' % (server, OAUTH_PATH) + print NO_OPEN_LOCAL_MESSAGE_TEMPLATE % (page,) + access_token = raw_input('Enter access token: ').strip() + + return access_token + + +class KeyringCreds(object): + def __init__(self, server, host, email): + self.server = server + # Explicitly cast host to str to work around bug in old versions of Keyring + # (versions before 0.10). Even though newer versions of Keyring fix this, + # some modern linuxes (such as Ubuntu 12.04) still bundle a version with + # the bug. + self.host = str(host) + self.email = email + self.accounts_seen = set() + + def GetUserCredentials(self): + """Prompts the user for a username and password. + + Only use keyring on the initial call. If the keyring contains the wrong + password, we want to give the user a chance to enter another one. + """ + # Create a local alias to the email variable to avoid Python's crazy + # scoping rules. + global keyring + email = self.email + if email is None: + email = GetEmail("Email (login for uploading to %s)" % self.server) + password = None + if keyring and not email in self.accounts_seen: + try: + password = keyring.get_password(self.host, email) + except: + # Sadly, we have to trap all errors here as + # gnomekeyring.IOError inherits from object. :/ + print "Failed to get password from keyring" + keyring = None + if password is not None: + print "Using password from system keyring." + self.accounts_seen.add(email) + else: + password = getpass.getpass("Password for %s: " % email) + if keyring: + answer = raw_input("Store password in system keyring?(y/N) ").strip() + if answer == "y": + keyring.set_password(self.host, email, password) + self.accounts_seen.add(email) + return (email, password) + + +class OAuth2Creds(object): + """Simple object to hold server and port to be passed to GetAccessToken.""" + + def __init__(self, server, port, open_local_webbrowser=True): + self.server = server + self.port = port + self.open_local_webbrowser = open_local_webbrowser + + def __call__(self): + """Uses stored server and port to retrieve OAuth 2.0 access token.""" + return GetAccessToken(server=self.server, port=self.port, + open_local_webbrowser=self.open_local_webbrowser) + + +def GetRpcServer(server, email=None, host_override=None, save_cookies=True, + account_type=AUTH_ACCOUNT_TYPE, use_oauth2=False, + oauth2_port=DEFAULT_OAUTH2_PORT, + open_oauth2_local_webbrowser=True): + """Returns an instance of an AbstractRpcServer. + + Args: + server: String containing the review server URL. + email: String containing user's email address. + host_override: If not None, string containing an alternate hostname to use + in the host header. + save_cookies: Whether authentication cookies should be saved to disk. + account_type: Account type for authentication, either 'GOOGLE' + or 'HOSTED'. Defaults to AUTH_ACCOUNT_TYPE. + use_oauth2: Boolean indicating whether OAuth 2.0 should be used for + authentication. + oauth2_port: Integer, the port where the localhost server receiving the + redirect is serving. Defaults to DEFAULT_OAUTH2_PORT. + open_oauth2_local_webbrowser: Boolean, defaults to True. If True and using + OAuth, this opens a page in the user's browser to obtain a token. + + Returns: + A new HttpRpcServer, on which RPC calls can be made. + """ + # If this is the dev_appserver, use fake authentication. + host = (host_override or server).lower() + if re.match(r'(http://)?localhost([:/]|$)', host): + if email is None: + email = "test@example.com" + logging.info("Using debug user %s. Override with --email" % email) + server = HttpRpcServer( + server, + lambda: (email, "password"), + host_override=host_override, + extra_headers={"Cookie": + 'dev_appserver_login="%s:False"' % email}, + save_cookies=save_cookies, + account_type=account_type) + # Don't try to talk to ClientLogin. + server.authenticated = True + return server + + positional_args = [server] + if use_oauth2: + positional_args.append( + OAuth2Creds(server, oauth2_port, open_oauth2_local_webbrowser)) + else: + positional_args.append(KeyringCreds(server, host, email).GetUserCredentials) + return HttpRpcServer(*positional_args, + host_override=host_override, + save_cookies=save_cookies, + account_type=account_type) + + +def EncodeMultipartFormData(fields, files): + """Encode form fields for multipart/form-data. + + Args: + fields: A sequence of (name, value) elements for regular form fields. + files: A sequence of (name, filename, value) elements for data to be + uploaded as files. + Returns: + (content_type, body) ready for httplib.HTTP instance. + + Source: + http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306 + """ + BOUNDARY = '-M-A-G-I-C---B-O-U-N-D-A-R-Y-' + CRLF = '\r\n' + lines = [] + for (key, value) in fields: + lines.append('--' + BOUNDARY) + lines.append('Content-Disposition: form-data; name="%s"' % key) + lines.append('') + if isinstance(value, unicode): + value = value.encode('utf-8') + lines.append(value) + for (key, filename, value) in files: + lines.append('--' + BOUNDARY) + lines.append('Content-Disposition: form-data; name="%s"; filename="%s"' % + (key, filename)) + lines.append('Content-Type: %s' % GetContentType(filename)) + lines.append('') + if isinstance(value, unicode): + value = value.encode('utf-8') + lines.append(value) + lines.append('--' + BOUNDARY + '--') + lines.append('') + body = CRLF.join(lines) + content_type = 'multipart/form-data; boundary=%s' % BOUNDARY + return content_type, body + + +def GetContentType(filename): + """Helper to guess the content-type from the filename.""" + return mimetypes.guess_type(filename)[0] or 'application/octet-stream' + + +# Use a shell for subcommands on Windows to get a PATH search. +use_shell = sys.platform.startswith("win") + +def RunShellWithReturnCodeAndStderr(command, print_output=False, + universal_newlines=True, + env=os.environ): + """Executes a command and returns the output from stdout, stderr and the return code. + + Args: + command: Command to execute. + print_output: If True, the output is printed to stdout. + If False, both stdout and stderr are ignored. + universal_newlines: Use universal_newlines flag (default: True). + + Returns: + Tuple (stdout, stderr, return code) + """ + logging.info("Running %s", command) + env = env.copy() + env['LC_MESSAGES'] = 'C' + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + shell=use_shell, universal_newlines=universal_newlines, + env=env) + if print_output: + output_array = [] + while True: + line = p.stdout.readline() + if not line: + break + print line.strip("\n") + output_array.append(line) + output = "".join(output_array) + else: + output = p.stdout.read() + p.wait() + errout = p.stderr.read() + if print_output and errout: + print >>sys.stderr, errout + p.stdout.close() + p.stderr.close() + return output, errout, p.returncode + +def RunShellWithReturnCode(command, print_output=False, + universal_newlines=True, + env=os.environ): + """Executes a command and returns the output from stdout and the return code.""" + out, err, retcode = RunShellWithReturnCodeAndStderr(command, print_output, + universal_newlines, env) + return out, retcode + +def RunShell(command, silent_ok=False, universal_newlines=True, + print_output=False, env=os.environ): + data, retcode = RunShellWithReturnCode(command, print_output, + universal_newlines, env) + if retcode: + ErrorExit("Got error status from %s:\n%s" % (command, data)) + if not silent_ok and not data: + ErrorExit("No output from %s" % command) + return data + + +class VersionControlSystem(object): + """Abstract base class providing an interface to the VCS.""" + + def __init__(self, options): + """Constructor. + + Args: + options: Command line options. + """ + self.options = options + + def GetGUID(self): + """Return string to distinguish the repository from others, for example to + query all opened review issues for it""" + raise NotImplementedError( + "abstract method -- subclass %s must override" % self.__class__) + + def PostProcessDiff(self, diff): + """Return the diff with any special post processing this VCS needs, e.g. + to include an svn-style "Index:".""" + return diff + + def GenerateDiff(self, args): + """Return the current diff as a string. + + Args: + args: Extra arguments to pass to the diff command. + """ + raise NotImplementedError( + "abstract method -- subclass %s must override" % self.__class__) + + def GetUnknownFiles(self): + """Return a list of files unknown to the VCS.""" + raise NotImplementedError( + "abstract method -- subclass %s must override" % self.__class__) + + def CheckForUnknownFiles(self): + """Show an "are you sure?" prompt if there are unknown files.""" + unknown_files = self.GetUnknownFiles() + if unknown_files: + print "The following files are not added to version control:" + for line in unknown_files: + print line + prompt = "Are you sure to continue?(y/N) " + answer = raw_input(prompt).strip() + if answer != "y": + ErrorExit("User aborted") + + def GetBaseFile(self, filename): + """Get the content of the upstream version of a file. + + Returns: + A tuple (base_content, new_content, is_binary, status) + base_content: The contents of the base file. + new_content: For text files, this is empty. For binary files, this is + the contents of the new file, since the diff output won't contain + information to reconstruct the current file. + is_binary: True iff the file is binary. + status: The status of the file. + """ + + raise NotImplementedError( + "abstract method -- subclass %s must override" % self.__class__) + + + def GetBaseFiles(self, diff): + """Helper that calls GetBase file for each file in the patch. + + Returns: + A dictionary that maps from filename to GetBaseFile's tuple. Filenames + are retrieved based on lines that start with "Index:" or + "Property changes on:". + """ + files = {} + for line in diff.splitlines(True): + if line.startswith('Index:') or line.startswith('Property changes on:'): + unused, filename = line.split(':', 1) + # On Windows if a file has property changes its filename uses '\' + # instead of '/'. + filename = filename.strip().replace('\\', '/') + files[filename] = self.GetBaseFile(filename) + return files + + + def UploadBaseFiles(self, issue, rpc_server, patch_list, patchset, options, + files): + """Uploads the base files (and if necessary, the current ones as well).""" + + def UploadFile(filename, file_id, content, is_binary, status, is_base): + """Uploads a file to the server.""" + file_too_large = False + if is_base: + type = "base" + else: + type = "current" + if len(content) > MAX_UPLOAD_SIZE: + print ("Not uploading the %s file for %s because it's too large." % + (type, filename)) + file_too_large = True + content = "" + checksum = md5(content).hexdigest() + if options.verbose > 0 and not file_too_large: + print "Uploading %s file for %s" % (type, filename) + url = "/%d/upload_content/%d/%d" % (int(issue), int(patchset), file_id) + form_fields = [("filename", filename), + ("status", status), + ("checksum", checksum), + ("is_binary", str(is_binary)), + ("is_current", str(not is_base)), + ] + if file_too_large: + form_fields.append(("file_too_large", "1")) + if options.email: + form_fields.append(("user", options.email)) + ctype, body = EncodeMultipartFormData(form_fields, + [("data", filename, content)]) + response_body = rpc_server.Send(url, body, + content_type=ctype) + if not response_body.startswith("OK"): + StatusUpdate(" --> %s" % response_body) + sys.exit(1) + + patches = dict() + [patches.setdefault(v, k) for k, v in patch_list] + for filename in patches.keys(): + base_content, new_content, is_binary, status = files[filename] + file_id_str = patches.get(filename) + if file_id_str.find("nobase") != -1: + base_content = None + file_id_str = file_id_str[file_id_str.rfind("_") + 1:] + file_id = int(file_id_str) + if base_content != None: + UploadFile(filename, file_id, base_content, is_binary, status, True) + if new_content != None: + UploadFile(filename, file_id, new_content, is_binary, status, False) + + def IsImage(self, filename): + """Returns true if the filename has an image extension.""" + mimetype = mimetypes.guess_type(filename)[0] + if not mimetype: + return False + return mimetype.startswith("image/") and not mimetype.startswith("image/svg") + + def IsBinaryData(self, data): + """Returns true if data contains a null byte.""" + # Derived from how Mercurial's heuristic, see + # http://selenic.com/hg/file/848a6658069e/mercurial/util.py#l229 + return bool(data and "\0" in data) + + +class SubversionVCS(VersionControlSystem): + """Implementation of the VersionControlSystem interface for Subversion.""" + + def __init__(self, options): + super(SubversionVCS, self).__init__(options) + if self.options.revision: + match = re.match(r"(\d+)(:(\d+))?", self.options.revision) + if not match: + ErrorExit("Invalid Subversion revision %s." % self.options.revision) + self.rev_start = match.group(1) + self.rev_end = match.group(3) + else: + self.rev_start = self.rev_end = None + # Cache output from "svn list -r REVNO dirname". + # Keys: dirname, Values: 2-tuple (ouput for start rev and end rev). + self.svnls_cache = {} + # Base URL is required to fetch files deleted in an older revision. + # Result is cached to not guess it over and over again in GetBaseFile(). + required = self.options.download_base or self.options.revision is not None + self.svn_base = self._GuessBase(required) + + def GetGUID(self): + return self._GetInfo("Repository UUID") + + def GuessBase(self, required): + """Wrapper for _GuessBase.""" + return self.svn_base + + def _GuessBase(self, required): + """Returns base URL for current diff. + + Args: + required: If true, exits if the url can't be guessed, otherwise None is + returned. + """ + url = self._GetInfo("URL") + if url: + scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) + guess = "" + # TODO(anatoli) - repository specific hacks should be handled by server + if netloc == "svn.python.org" and scheme == "svn+ssh": + path = "projects" + path + scheme = "http" + guess = "Python " + elif netloc.endswith(".googlecode.com"): + scheme = "http" + guess = "Google Code " + path = path + "/" + base = urlparse.urlunparse((scheme, netloc, path, params, + query, fragment)) + logging.info("Guessed %sbase = %s", guess, base) + return base + if required: + ErrorExit("Can't find URL in output from svn info") + return None + + def _GetInfo(self, key): + """Parses 'svn info' for current dir. Returns value for key or None""" + for line in RunShell(["svn", "info"]).splitlines(): + if line.startswith(key + ": "): + return line.split(":", 1)[1].strip() + + def _EscapeFilename(self, filename): + """Escapes filename for SVN commands.""" + if "@" in filename and not filename.endswith("@"): + filename = "%s@" % filename + return filename + + def GenerateDiff(self, args): + cmd = ["svn", "diff"] + if self.options.revision: + cmd += ["-r", self.options.revision] + cmd.extend(args) + data = RunShell(cmd) + count = 0 + for line in data.splitlines(): + if line.startswith("Index:") or line.startswith("Property changes on:"): + count += 1 + logging.info(line) + if not count: + ErrorExit("No valid patches found in output from svn diff") + return data + + def _CollapseKeywords(self, content, keyword_str): + """Collapses SVN keywords.""" + # svn cat translates keywords but svn diff doesn't. As a result of this + # behavior patching.PatchChunks() fails with a chunk mismatch error. + # This part was originally written by the Review Board development team + # who had the same problem (http://reviews.review-board.org/r/276/). + # Mapping of keywords to known aliases + svn_keywords = { + # Standard keywords + 'Date': ['Date', 'LastChangedDate'], + 'Revision': ['Revision', 'LastChangedRevision', 'Rev'], + 'Author': ['Author', 'LastChangedBy'], + 'HeadURL': ['HeadURL', 'URL'], + 'Id': ['Id'], + + # Aliases + 'LastChangedDate': ['LastChangedDate', 'Date'], + 'LastChangedRevision': ['LastChangedRevision', 'Rev', 'Revision'], + 'LastChangedBy': ['LastChangedBy', 'Author'], + 'URL': ['URL', 'HeadURL'], + } + + def repl(m): + if m.group(2): + return "$%s::%s$" % (m.group(1), " " * len(m.group(3))) + return "$%s$" % m.group(1) + keywords = [keyword + for name in keyword_str.split(" ") + for keyword in svn_keywords.get(name, [])] + return re.sub(r"\$(%s):(:?)([^\$]+)\$" % '|'.join(keywords), repl, content) + + def GetUnknownFiles(self): + status = RunShell(["svn", "status", "--ignore-externals"], silent_ok=True) + unknown_files = [] + for line in status.split("\n"): + if line and line[0] == "?": + unknown_files.append(line) + return unknown_files + + def ReadFile(self, filename): + """Returns the contents of a file.""" + file = open(filename, 'rb') + result = "" + try: + result = file.read() + finally: + file.close() + return result + + def GetStatus(self, filename): + """Returns the status of a file.""" + if not self.options.revision: + status = RunShell(["svn", "status", "--ignore-externals", + self._EscapeFilename(filename)]) + if not status: + ErrorExit("svn status returned no output for %s" % filename) + status_lines = status.splitlines() + # If file is in a cl, the output will begin with + # "\n--- Changelist 'cl_name':\n". See + # http://svn.collab.net/repos/svn/trunk/notes/changelist-design.txt + if (len(status_lines) == 3 and + not status_lines[0] and + status_lines[1].startswith("--- Changelist")): + status = status_lines[2] + else: + status = status_lines[0] + # If we have a revision to diff against we need to run "svn list" + # for the old and the new revision and compare the results to get + # the correct status for a file. + else: + dirname, relfilename = os.path.split(filename) + if dirname not in self.svnls_cache: + cmd = ["svn", "list", "-r", self.rev_start, + self._EscapeFilename(dirname) or "."] + out, err, returncode = RunShellWithReturnCodeAndStderr(cmd) + if returncode: + # Directory might not yet exist at start revison + # svn: Unable to find repository location for 'abc' in revision nnn + if re.match('^svn: Unable to find repository location for .+ in revision \d+', err): + old_files = () + else: + ErrorExit("Failed to get status for %s:\n%s" % (filename, err)) + else: + old_files = out.splitlines() + args = ["svn", "list"] + if self.rev_end: + args += ["-r", self.rev_end] + cmd = args + [self._EscapeFilename(dirname) or "."] + out, returncode = RunShellWithReturnCode(cmd) + if returncode: + ErrorExit("Failed to run command %s" % cmd) + self.svnls_cache[dirname] = (old_files, out.splitlines()) + old_files, new_files = self.svnls_cache[dirname] + if relfilename in old_files and relfilename not in new_files: + status = "D " + elif relfilename in old_files and relfilename in new_files: + status = "M " + else: + status = "A " + return status + + def GetBaseFile(self, filename): + status = self.GetStatus(filename) + base_content = None + new_content = None + + # If a file is copied its status will be "A +", which signifies + # "addition-with-history". See "svn st" for more information. We need to + # upload the original file or else diff parsing will fail if the file was + # edited. + if status[0] == "A" and status[3] != "+": + # We'll need to upload the new content if we're adding a binary file + # since diff's output won't contain it. + mimetype = RunShell(["svn", "propget", "svn:mime-type", + self._EscapeFilename(filename)], silent_ok=True) + base_content = "" + is_binary = bool(mimetype) and not mimetype.startswith("text/") + if is_binary: + new_content = self.ReadFile(filename) + elif (status[0] in ("M", "D", "R") or + (status[0] == "A" and status[3] == "+") or # Copied file. + (status[0] == " " and status[1] == "M")): # Property change. + args = [] + if self.options.revision: + # filename must not be escaped. We already add an ampersand here. + url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) + else: + # Don't change filename, it's needed later. + url = filename + args += ["-r", "BASE"] + cmd = ["svn"] + args + ["propget", "svn:mime-type", url] + mimetype, returncode = RunShellWithReturnCode(cmd) + if returncode: + # File does not exist in the requested revision. + # Reset mimetype, it contains an error message. + mimetype = "" + else: + mimetype = mimetype.strip() + get_base = False + # this test for binary is exactly the test prescribed by the + # official SVN docs at + # http://subversion.apache.org/faq.html#binary-files + is_binary = (bool(mimetype) and + not mimetype.startswith("text/") and + mimetype not in ("image/x-xbitmap", "image/x-xpixmap")) + if status[0] == " ": + # Empty base content just to force an upload. + base_content = "" + elif is_binary: + get_base = True + if status[0] == "M": + if not self.rev_end: + new_content = self.ReadFile(filename) + else: + url = "%s/%s@%s" % (self.svn_base, filename, self.rev_end) + new_content = RunShell(["svn", "cat", url], + universal_newlines=True, silent_ok=True) + else: + get_base = True + + if get_base: + if is_binary: + universal_newlines = False + else: + universal_newlines = True + if self.rev_start: + # "svn cat -r REV delete_file.txt" doesn't work. cat requires + # the full URL with "@REV" appended instead of using "-r" option. + url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) + base_content = RunShell(["svn", "cat", url], + universal_newlines=universal_newlines, + silent_ok=True) + else: + base_content, ret_code = RunShellWithReturnCode( + ["svn", "cat", self._EscapeFilename(filename)], + universal_newlines=universal_newlines) + if ret_code and status[0] == "R": + # It's a replaced file without local history (see issue208). + # The base file needs to be fetched from the server. + url = "%s/%s" % (self.svn_base, filename) + base_content = RunShell(["svn", "cat", url], + universal_newlines=universal_newlines, + silent_ok=True) + elif ret_code: + ErrorExit("Got error status from 'svn cat %s'" % filename) + if not is_binary: + args = [] + if self.rev_start: + url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) + else: + url = filename + args += ["-r", "BASE"] + cmd = ["svn"] + args + ["propget", "svn:keywords", url] + keywords, returncode = RunShellWithReturnCode(cmd) + if keywords and not returncode: + base_content = self._CollapseKeywords(base_content, keywords) + else: + StatusUpdate("svn status returned unexpected output: %s" % status) + sys.exit(1) + return base_content, new_content, is_binary, status[0:5] + + +class GitVCS(VersionControlSystem): + """Implementation of the VersionControlSystem interface for Git.""" + + def __init__(self, options): + super(GitVCS, self).__init__(options) + # Map of filename -> (hash before, hash after) of base file. + # Hashes for "no such file" are represented as None. + self.hashes = {} + # Map of new filename -> old filename for renames. + self.renames = {} + + def GetGUID(self): + revlist = RunShell("git rev-list --parents HEAD".split()).splitlines() + # M-A: Return the 1st root hash, there could be multiple when a + # subtree is merged. In that case, more analysis would need to + # be done to figure out which HEAD is the 'most representative'. + for r in revlist: + if ' ' not in r: + return r + + def PostProcessDiff(self, gitdiff): + """Converts the diff output to include an svn-style "Index:" line as well + as record the hashes of the files, so we can upload them along with our + diff.""" + # Special used by git to indicate "no such content". + NULL_HASH = "0"*40 + + def IsFileNew(filename): + return filename in self.hashes and self.hashes[filename][0] is None + + def AddSubversionPropertyChange(filename): + """Add svn's property change information into the patch if given file is + new file. + + We use Subversion's auto-props setting to retrieve its property. + See http://svnbook.red-bean.com/en/1.1/ch07.html#svn-ch-7-sect-1.3.2 for + Subversion's [auto-props] setting. + """ + if self.options.emulate_svn_auto_props and IsFileNew(filename): + svnprops = GetSubversionPropertyChanges(filename) + if svnprops: + svndiff.append("\n" + svnprops + "\n") + + svndiff = [] + filecount = 0 + filename = None + for line in gitdiff.splitlines(): + match = re.match(r"diff --git a/(.*) b/(.*)$", line) + if match: + # Add auto property here for previously seen file. + if filename is not None: + AddSubversionPropertyChange(filename) + filecount += 1 + # Intentionally use the "after" filename so we can show renames. + filename = match.group(2) + svndiff.append("Index: %s\n" % filename) + if match.group(1) != match.group(2): + self.renames[match.group(2)] = match.group(1) + else: + # The "index" line in a git diff looks like this (long hashes elided): + # index 82c0d44..b2cee3f 100755 + # We want to save the left hash, as that identifies the base file. + match = re.match(r"index (\w+)\.\.(\w+)", line) + if match: + before, after = (match.group(1), match.group(2)) + if before == NULL_HASH: + before = None + if after == NULL_HASH: + after = None + self.hashes[filename] = (before, after) + svndiff.append(line + "\n") + if not filecount: + ErrorExit("No valid patches found in output from git diff") + # Add auto property for the last seen file. + assert filename is not None + AddSubversionPropertyChange(filename) + return "".join(svndiff) + + def GenerateDiff(self, extra_args): + extra_args = extra_args[:] + if self.options.revision: + if ":" in self.options.revision: + extra_args = self.options.revision.split(":", 1) + extra_args + else: + extra_args = [self.options.revision] + extra_args + + # --no-ext-diff is broken in some versions of Git, so try to work around + # this by overriding the environment (but there is still a problem if the + # git config key "diff.external" is used). + env = os.environ.copy() + if "GIT_EXTERNAL_DIFF" in env: + del env["GIT_EXTERNAL_DIFF"] + # -M/-C will not print the diff for the deleted file when a file is renamed. + # This is confusing because the original file will not be shown on the + # review when a file is renamed. So, get a diff with ONLY deletes, then + # append a diff (with rename detection), without deletes. + cmd = [ + "git", "diff", "--no-color", "--no-ext-diff", "--full-index", + "--ignore-submodules", + ] + diff = RunShell( + cmd + ["--no-renames", "--diff-filter=D"] + extra_args, + env=env, silent_ok=True) + if self.options.git_find_copies: + similarity_options = ["--find-copies-harder", "-l100000", + "-C%s" % self.options.git_similarity ] + else: + similarity_options = ["-M%s" % self.options.git_similarity ] + diff += RunShell( + cmd + ["--diff-filter=AMCRT"] + similarity_options + extra_args, + env=env, silent_ok=True) + + # Added by Kristinn. + if self.options.add_cache: + diff += RunShell(cmd + ["--cached"], env=env, silent_ok=True) + # The CL could be only file deletion or not. So accept silent diff for both + # commands then check for an empty diff manually. + if not diff: + ErrorExit("No output from %s" % (cmd + extra_args)) + return diff + + def GetUnknownFiles(self): + status = RunShell(["git", "ls-files", "--exclude-standard", "--others"], + silent_ok=True) + return status.splitlines() + + def GetFileContent(self, file_hash, is_binary): + """Returns the content of a file identified by its git hash.""" + data, retcode = RunShellWithReturnCode(["git", "show", file_hash], + universal_newlines=not is_binary) + if retcode: + ErrorExit("Got error status from 'git show %s'" % file_hash) + return data + + def GetBaseFile(self, filename): + hash_before, hash_after = self.hashes.get(filename, (None,None)) + base_content = None + new_content = None + status = None + + if filename in self.renames: + status = "A +" # Match svn attribute name for renames. + if filename not in self.hashes: + # If a rename doesn't change the content, we never get a hash. + base_content = RunShell( + ["git", "show", "HEAD:" + filename], silent_ok=True) + elif not hash_before: + status = "A" + base_content = "" + elif not hash_after: + status = "D" + else: + status = "M" + + is_image = self.IsImage(filename) + is_binary = self.IsBinaryData(base_content) or is_image + + # Grab the before/after content if we need it. + # Grab the base content if we don't have it already. + if base_content is None and hash_before: + base_content = self.GetFileContent(hash_before, is_binary) + # Only include the "after" file if it's an image; otherwise it + # it is reconstructed from the diff. + if is_image and hash_after: + new_content = self.GetFileContent(hash_after, is_binary) + + return (base_content, new_content, is_binary, status) + + +class CVSVCS(VersionControlSystem): + """Implementation of the VersionControlSystem interface for CVS.""" + + def __init__(self, options): + super(CVSVCS, self).__init__(options) + + def GetGUID(self): + """For now we don't know how to get repository ID for CVS""" + return + + def GetOriginalContent_(self, filename): + RunShell(["cvs", "up", filename], silent_ok=True) + # TODO need detect file content encoding + content = open(filename).read() + return content.replace("\r\n", "\n") + + def GetBaseFile(self, filename): + base_content = None + new_content = None + status = "A" + + output, retcode = RunShellWithReturnCode(["cvs", "status", filename]) + if retcode: + ErrorExit("Got error status from 'cvs status %s'" % filename) + + if output.find("Status: Locally Modified") != -1: + status = "M" + temp_filename = "%s.tmp123" % filename + os.rename(filename, temp_filename) + base_content = self.GetOriginalContent_(filename) + os.rename(temp_filename, filename) + elif output.find("Status: Locally Added"): + status = "A" + base_content = "" + elif output.find("Status: Needs Checkout"): + status = "D" + base_content = self.GetOriginalContent_(filename) + + return (base_content, new_content, self.IsBinaryData(base_content), status) + + def GenerateDiff(self, extra_args): + cmd = ["cvs", "diff", "-u", "-N"] + if self.options.revision: + cmd += ["-r", self.options.revision] + + cmd.extend(extra_args) + data, retcode = RunShellWithReturnCode(cmd) + count = 0 + if retcode in [0, 1]: + for line in data.splitlines(): + if line.startswith("Index:"): + count += 1 + logging.info(line) + + if not count: + ErrorExit("No valid patches found in output from cvs diff") + + return data + + def GetUnknownFiles(self): + data, retcode = RunShellWithReturnCode(["cvs", "diff"]) + if retcode not in [0, 1]: + ErrorExit("Got error status from 'cvs diff':\n%s" % (data,)) + unknown_files = [] + for line in data.split("\n"): + if line and line[0] == "?": + unknown_files.append(line) + return unknown_files + +class MercurialVCS(VersionControlSystem): + """Implementation of the VersionControlSystem interface for Mercurial.""" + + def __init__(self, options, repo_dir): + super(MercurialVCS, self).__init__(options) + # Absolute path to repository (we can be in a subdir) + self.repo_dir = os.path.normpath(repo_dir) + # Compute the subdir + cwd = os.path.normpath(os.getcwd()) + assert cwd.startswith(self.repo_dir) + self.subdir = cwd[len(self.repo_dir):].lstrip(r"\/") + if self.options.revision: + self.base_rev = self.options.revision + else: + self.base_rev = RunShell(["hg", "parent", "-q"]).split(':')[1].strip() + + def GetGUID(self): + # See chapter "Uniquely identifying a repository" + # http://hgbook.red-bean.com/read/customizing-the-output-of-mercurial.html + info = RunShell("hg log -r0 --template {node}".split()) + return info.strip() + + def _GetRelPath(self, filename): + """Get relative path of a file according to the current directory, + given its logical path in the repo.""" + absname = os.path.join(self.repo_dir, filename) + return os.path.relpath(absname) + + def GenerateDiff(self, extra_args): + cmd = ["hg", "diff", "--git", "-r", self.base_rev] + extra_args + data = RunShell(cmd, silent_ok=True) + svndiff = [] + filecount = 0 + for line in data.splitlines(): + m = re.match("diff --git a/(\S+) b/(\S+)", line) + if m: + # Modify line to make it look like as it comes from svn diff. + # With this modification no changes on the server side are required + # to make upload.py work with Mercurial repos. + # NOTE: for proper handling of moved/copied files, we have to use + # the second filename. + filename = m.group(2) + svndiff.append("Index: %s" % filename) + svndiff.append("=" * 67) + filecount += 1 + logging.info(line) + else: + svndiff.append(line) + if not filecount: + ErrorExit("No valid patches found in output from hg diff") + return "\n".join(svndiff) + "\n" + + def GetUnknownFiles(self): + """Return a list of files unknown to the VCS.""" + args = [] + status = RunShell(["hg", "status", "--rev", self.base_rev, "-u", "."], + silent_ok=True) + unknown_files = [] + for line in status.splitlines(): + st, fn = line.split(" ", 1) + if st == "?": + unknown_files.append(fn) + return unknown_files + + def GetBaseFile(self, filename): + # "hg status" and "hg cat" both take a path relative to the current subdir, + # but "hg diff" has given us the path relative to the repo root. + base_content = "" + new_content = None + is_binary = False + oldrelpath = relpath = self._GetRelPath(filename) + # "hg status -C" returns two lines for moved/copied files, one otherwise + out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath]) + out = out.splitlines() + # HACK: strip error message about missing file/directory if it isn't in + # the working copy + if out[0].startswith('%s: ' % relpath): + out = out[1:] + status, _ = out[0].split(' ', 1) + if len(out) > 1 and status == "A": + # Moved/copied => considered as modified, use old filename to + # retrieve base contents + oldrelpath = out[1].strip() + status = "M" + if ":" in self.base_rev: + base_rev = self.base_rev.split(":", 1)[0] + else: + base_rev = self.base_rev + if status != "A": + base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], + silent_ok=True) + is_binary = self.IsBinaryData(base_content) + if status != "R": + new_content = open(relpath, "rb").read() + is_binary = is_binary or self.IsBinaryData(new_content) + if is_binary and base_content: + # Fetch again without converting newlines + base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], + silent_ok=True, universal_newlines=False) + if not is_binary: + new_content = None + return base_content, new_content, is_binary, status + + +class PerforceVCS(VersionControlSystem): + """Implementation of the VersionControlSystem interface for Perforce.""" + + def __init__(self, options): + + def ConfirmLogin(): + # Make sure we have a valid perforce session + while True: + data, retcode = self.RunPerforceCommandWithReturnCode( + ["login", "-s"], marshal_output=True) + if not data: + ErrorExit("Error checking perforce login") + if not retcode and (not "code" in data or data["code"] != "error"): + break + print "Enter perforce password: " + self.RunPerforceCommandWithReturnCode(["login"]) + + super(PerforceVCS, self).__init__(options) + + self.p4_changelist = options.p4_changelist + if not self.p4_changelist: + ErrorExit("A changelist id is required") + if (options.revision): + ErrorExit("--rev is not supported for perforce") + + self.p4_port = options.p4_port + self.p4_client = options.p4_client + self.p4_user = options.p4_user + + ConfirmLogin() + + if not options.title: + description = self.RunPerforceCommand(["describe", self.p4_changelist], + marshal_output=True) + if description and "desc" in description: + # Rietveld doesn't support multi-line descriptions + raw_title = description["desc"].strip() + lines = raw_title.splitlines() + if len(lines): + options.title = lines[0] + + def GetGUID(self): + """For now we don't know how to get repository ID for Perforce""" + return + + def RunPerforceCommandWithReturnCode(self, extra_args, marshal_output=False, + universal_newlines=True): + args = ["p4"] + if marshal_output: + # -G makes perforce format its output as marshalled python objects + args.extend(["-G"]) + if self.p4_port: + args.extend(["-p", self.p4_port]) + if self.p4_client: + args.extend(["-c", self.p4_client]) + if self.p4_user: + args.extend(["-u", self.p4_user]) + args.extend(extra_args) + + data, retcode = RunShellWithReturnCode( + args, print_output=False, universal_newlines=universal_newlines) + if marshal_output and data: + data = marshal.loads(data) + return data, retcode + + def RunPerforceCommand(self, extra_args, marshal_output=False, + universal_newlines=True): + # This might be a good place to cache call results, since things like + # describe or fstat might get called repeatedly. + data, retcode = self.RunPerforceCommandWithReturnCode( + extra_args, marshal_output, universal_newlines) + if retcode: + ErrorExit("Got error status from %s:\n%s" % (extra_args, data)) + return data + + def GetFileProperties(self, property_key_prefix = "", command = "describe"): + description = self.RunPerforceCommand(["describe", self.p4_changelist], + marshal_output=True) + + changed_files = {} + file_index = 0 + # Try depotFile0, depotFile1, ... until we don't find a match + while True: + file_key = "depotFile%d" % file_index + if file_key in description: + filename = description[file_key] + change_type = description[property_key_prefix + str(file_index)] + changed_files[filename] = change_type + file_index += 1 + else: + break + return changed_files + + def GetChangedFiles(self): + return self.GetFileProperties("action") + + def GetUnknownFiles(self): + # Perforce doesn't detect new files, they have to be explicitly added + return [] + + def IsBaseBinary(self, filename): + base_filename = self.GetBaseFilename(filename) + return self.IsBinaryHelper(base_filename, "files") + + def IsPendingBinary(self, filename): + return self.IsBinaryHelper(filename, "describe") + + def IsBinaryHelper(self, filename, command): + file_types = self.GetFileProperties("type", command) + if not filename in file_types: + ErrorExit("Trying to check binary status of unknown file %s." % filename) + # This treats symlinks, macintosh resource files, temporary objects, and + # unicode as binary. See the Perforce docs for more details: + # http://www.perforce.com/perforce/doc.current/manuals/cmdref/o.ftypes.html + return not file_types[filename].endswith("text") + + def GetFileContent(self, filename, revision, is_binary): + file_arg = filename + if revision: + file_arg += "#" + revision + # -q suppresses the initial line that displays the filename and revision + return self.RunPerforceCommand(["print", "-q", file_arg], + universal_newlines=not is_binary) + + def GetBaseFilename(self, filename): + actionsWithDifferentBases = [ + "move/add", # p4 move + "branch", # p4 integrate (to a new file), similar to hg "add" + "add", # p4 integrate (to a new file), after modifying the new file + ] + + # We only see a different base for "add" if this is a downgraded branch + # after a file was branched (integrated), then edited. + if self.GetAction(filename) in actionsWithDifferentBases: + # -Or shows information about pending integrations/moves + fstat_result = self.RunPerforceCommand(["fstat", "-Or", filename], + marshal_output=True) + + baseFileKey = "resolveFromFile0" # I think it's safe to use only file0 + if baseFileKey in fstat_result: + return fstat_result[baseFileKey] + + return filename + + def GetBaseRevision(self, filename): + base_filename = self.GetBaseFilename(filename) + + have_result = self.RunPerforceCommand(["have", base_filename], + marshal_output=True) + if "haveRev" in have_result: + return have_result["haveRev"] + + def GetLocalFilename(self, filename): + where = self.RunPerforceCommand(["where", filename], marshal_output=True) + if "path" in where: + return where["path"] + + def GenerateDiff(self, args): + class DiffData: + def __init__(self, perforceVCS, filename, action): + self.perforceVCS = perforceVCS + self.filename = filename + self.action = action + self.base_filename = perforceVCS.GetBaseFilename(filename) + + self.file_body = None + self.base_rev = None + self.prefix = None + self.working_copy = True + self.change_summary = None + + def GenerateDiffHeader(diffData): + header = [] + header.append("Index: %s" % diffData.filename) + header.append("=" * 67) + + if diffData.base_filename != diffData.filename: + if diffData.action.startswith("move"): + verb = "rename" + else: + verb = "copy" + header.append("%s from %s" % (verb, diffData.base_filename)) + header.append("%s to %s" % (verb, diffData.filename)) + + suffix = "\t(revision %s)" % diffData.base_rev + header.append("--- " + diffData.base_filename + suffix) + if diffData.working_copy: + suffix = "\t(working copy)" + header.append("+++ " + diffData.filename + suffix) + if diffData.change_summary: + header.append(diffData.change_summary) + return header + + def GenerateMergeDiff(diffData, args): + # -du generates a unified diff, which is nearly svn format + diffData.file_body = self.RunPerforceCommand( + ["diff", "-du", diffData.filename] + args) + diffData.base_rev = self.GetBaseRevision(diffData.filename) + diffData.prefix = "" + + # We have to replace p4's file status output (the lines starting + # with +++ or ---) to match svn's diff format + lines = diffData.file_body.splitlines() + first_good_line = 0 + while (first_good_line < len(lines) and + not lines[first_good_line].startswith("@@")): + first_good_line += 1 + diffData.file_body = "\n".join(lines[first_good_line:]) + return diffData + + def GenerateAddDiff(diffData): + fstat = self.RunPerforceCommand(["fstat", diffData.filename], + marshal_output=True) + if "headRev" in fstat: + diffData.base_rev = fstat["headRev"] # Re-adding a deleted file + else: + diffData.base_rev = "0" # Brand new file + diffData.working_copy = False + rel_path = self.GetLocalFilename(diffData.filename) + diffData.file_body = open(rel_path, 'r').read() + # Replicate svn's list of changed lines + line_count = len(diffData.file_body.splitlines()) + diffData.change_summary = "@@ -0,0 +1" + if line_count > 1: + diffData.change_summary += ",%d" % line_count + diffData.change_summary += " @@" + diffData.prefix = "+" + return diffData + + def GenerateDeleteDiff(diffData): + diffData.base_rev = self.GetBaseRevision(diffData.filename) + is_base_binary = self.IsBaseBinary(diffData.filename) + # For deletes, base_filename == filename + diffData.file_body = self.GetFileContent(diffData.base_filename, + None, + is_base_binary) + # Replicate svn's list of changed lines + line_count = len(diffData.file_body.splitlines()) + diffData.change_summary = "@@ -1" + if line_count > 1: + diffData.change_summary += ",%d" % line_count + diffData.change_summary += " +0,0 @@" + diffData.prefix = "-" + return diffData + + changed_files = self.GetChangedFiles() + + svndiff = [] + filecount = 0 + for (filename, action) in changed_files.items(): + svn_status = self.PerforceActionToSvnStatus(action) + if svn_status == "SKIP": + continue + + diffData = DiffData(self, filename, action) + # Is it possible to diff a branched file? Stackoverflow says no: + # http://stackoverflow.com/questions/1771314/in-perforce-command-line-how-to-diff-a-file-reopened-for-add + if svn_status == "M": + diffData = GenerateMergeDiff(diffData, args) + elif svn_status == "A": + diffData = GenerateAddDiff(diffData) + elif svn_status == "D": + diffData = GenerateDeleteDiff(diffData) + else: + ErrorExit("Unknown file action %s (svn action %s)." % \ + (action, svn_status)) + + svndiff += GenerateDiffHeader(diffData) + + for line in diffData.file_body.splitlines(): + svndiff.append(diffData.prefix + line) + filecount += 1 + if not filecount: + ErrorExit("No valid patches found in output from p4 diff") + return "\n".join(svndiff) + "\n" + + def PerforceActionToSvnStatus(self, status): + # Mirroring the list at http://permalink.gmane.org/gmane.comp.version-control.mercurial.devel/28717 + # Is there something more official? + return { + "add" : "A", + "branch" : "A", + "delete" : "D", + "edit" : "M", # Also includes changing file types. + "integrate" : "M", + "move/add" : "M", + "move/delete": "SKIP", + "purge" : "D", # How does a file's status become "purge"? + }[status] + + def GetAction(self, filename): + changed_files = self.GetChangedFiles() + if not filename in changed_files: + ErrorExit("Trying to get base version of unknown file %s." % filename) + + return changed_files[filename] + + def GetBaseFile(self, filename): + base_filename = self.GetBaseFilename(filename) + base_content = "" + new_content = None + + status = self.PerforceActionToSvnStatus(self.GetAction(filename)) + + if status != "A": + revision = self.GetBaseRevision(base_filename) + if not revision: + ErrorExit("Couldn't find base revision for file %s" % filename) + is_base_binary = self.IsBaseBinary(base_filename) + base_content = self.GetFileContent(base_filename, + revision, + is_base_binary) + + is_binary = self.IsPendingBinary(filename) + if status != "D" and status != "SKIP": + relpath = self.GetLocalFilename(filename) + if is_binary: + new_content = open(relpath, "rb").read() + + return base_content, new_content, is_binary, status + +# NOTE: The SplitPatch function is duplicated in engine.py, keep them in sync. +def SplitPatch(data): + """Splits a patch into separate pieces for each file. + + Args: + data: A string containing the output of svn diff. + + Returns: + A list of 2-tuple (filename, text) where text is the svn diff output + pertaining to filename. + """ + patches = [] + filename = None + diff = [] + for line in data.splitlines(True): + new_filename = None + if line.startswith('Index:'): + unused, new_filename = line.split(':', 1) + new_filename = new_filename.strip() + elif line.startswith('Property changes on:'): + unused, temp_filename = line.split(':', 1) + # When a file is modified, paths use '/' between directories, however + # when a property is modified '\' is used on Windows. Make them the same + # otherwise the file shows up twice. + temp_filename = temp_filename.strip().replace('\\', '/') + if temp_filename != filename: + # File has property changes but no modifications, create a new diff. + new_filename = temp_filename + if new_filename: + if filename and diff: + patches.append((filename, ''.join(diff))) + filename = new_filename + diff = [line] + continue + if diff is not None: + diff.append(line) + if filename and diff: + patches.append((filename, ''.join(diff))) + return patches + + +def UploadSeparatePatches(issue, rpc_server, patchset, data, options): + """Uploads a separate patch for each file in the diff output. + + Returns a list of [patch_key, filename] for each file. + """ + patches = SplitPatch(data) + rv = [] + for patch in patches: + if len(patch[1]) > MAX_UPLOAD_SIZE: + print ("Not uploading the patch for " + patch[0] + + " because the file is too large.") + continue + form_fields = [("filename", patch[0])] + if not options.download_base: + form_fields.append(("content_upload", "1")) + files = [("data", "data.diff", patch[1])] + ctype, body = EncodeMultipartFormData(form_fields, files) + url = "/%d/upload_patch/%d" % (int(issue), int(patchset)) + print "Uploading patch for " + patch[0] + response_body = rpc_server.Send(url, body, content_type=ctype) + lines = response_body.splitlines() + if not lines or lines[0] != "OK": + StatusUpdate(" --> %s" % response_body) + sys.exit(1) + rv.append([lines[1], patch[0]]) + return rv + + +def GuessVCSName(options): + """Helper to guess the version control system. + + This examines the current directory, guesses which VersionControlSystem + we're using, and returns an string indicating which VCS is detected. + + Returns: + A pair (vcs, output). vcs is a string indicating which VCS was detected + and is one of VCS_GIT, VCS_MERCURIAL, VCS_SUBVERSION, VCS_PERFORCE, + VCS_CVS, or VCS_UNKNOWN. + Since local perforce repositories can't be easily detected, this method + will only guess VCS_PERFORCE if any perforce options have been specified. + output is a string containing any interesting output from the vcs + detection routine, or None if there is nothing interesting. + """ + for attribute, value in options.__dict__.iteritems(): + if attribute.startswith("p4") and value != None: + return (VCS_PERFORCE, None) + + def RunDetectCommand(vcs_type, command): + """Helper to detect VCS by executing command. + + Returns: + A pair (vcs, output) or None. Throws exception on error. + """ + try: + out, returncode = RunShellWithReturnCode(command) + if returncode == 0: + return (vcs_type, out.strip()) + except OSError, (errcode, message): + if errcode != errno.ENOENT: # command not found code + raise + + # Mercurial has a command to get the base directory of a repository + # Try running it, but don't die if we don't have hg installed. + # NOTE: we try Mercurial first as it can sit on top of an SVN working copy. + res = RunDetectCommand(VCS_MERCURIAL, ["hg", "root"]) + if res != None: + return res + + # Subversion from 1.7 has a single centralized .svn folder + # ( see http://subversion.apache.org/docs/release-notes/1.7.html#wc-ng ) + # That's why we use 'svn info' instead of checking for .svn dir + res = RunDetectCommand(VCS_SUBVERSION, ["svn", "info"]) + if res != None: + return res + + # Git has a command to test if you're in a git tree. + # Try running it, but don't die if we don't have git installed. + res = RunDetectCommand(VCS_GIT, ["git", "rev-parse", + "--is-inside-work-tree"]) + if res != None: + return res + + # detect CVS repos use `cvs status && $? == 0` rules + res = RunDetectCommand(VCS_CVS, ["cvs", "status"]) + if res != None: + return res + + return (VCS_UNKNOWN, None) + + +def GuessVCS(options): + """Helper to guess the version control system. + + This verifies any user-specified VersionControlSystem (by command line + or environment variable). If the user didn't specify one, this examines + the current directory, guesses which VersionControlSystem we're using, + and returns an instance of the appropriate class. Exit with an error + if we can't figure it out. + + Returns: + A VersionControlSystem instance. Exits if the VCS can't be guessed. + """ + vcs = options.vcs + if not vcs: + vcs = os.environ.get("CODEREVIEW_VCS") + if vcs: + v = VCS_ABBREVIATIONS.get(vcs.lower()) + if v is None: + ErrorExit("Unknown version control system %r specified." % vcs) + (vcs, extra_output) = (v, None) + else: + (vcs, extra_output) = GuessVCSName(options) + + if vcs == VCS_MERCURIAL: + if extra_output is None: + extra_output = RunShell(["hg", "root"]).strip() + return MercurialVCS(options, extra_output) + elif vcs == VCS_SUBVERSION: + return SubversionVCS(options) + elif vcs == VCS_PERFORCE: + return PerforceVCS(options) + elif vcs == VCS_GIT: + return GitVCS(options) + elif vcs == VCS_CVS: + return CVSVCS(options) + + ErrorExit(("Could not guess version control system. " + "Are you in a working copy directory?")) + + +def CheckReviewer(reviewer): + """Validate a reviewer -- either a nickname or an email addres. + + Args: + reviewer: A nickname or an email address. + + Calls ErrorExit() if it is an invalid email address. + """ + if "@" not in reviewer: + return # Assume nickname + parts = reviewer.split("@") + if len(parts) > 2: + ErrorExit("Invalid email address: %r" % reviewer) + assert len(parts) == 2 + if "." not in parts[1]: + ErrorExit("Invalid email address: %r" % reviewer) + + +def LoadSubversionAutoProperties(): + """Returns the content of [auto-props] section of Subversion's config file as + a dictionary. + + Returns: + A dictionary whose key-value pair corresponds the [auto-props] section's + key-value pair. + In following cases, returns empty dictionary: + - config file doesn't exist, or + - 'enable-auto-props' is not set to 'true-like-value' in [miscellany]. + """ + if os.name == 'nt': + subversion_config = os.environ.get("APPDATA") + "\\Subversion\\config" + else: + subversion_config = os.path.expanduser("~/.subversion/config") + if not os.path.exists(subversion_config): + return {} + config = ConfigParser.ConfigParser() + config.read(subversion_config) + if (config.has_section("miscellany") and + config.has_option("miscellany", "enable-auto-props") and + config.getboolean("miscellany", "enable-auto-props") and + config.has_section("auto-props")): + props = {} + for file_pattern in config.options("auto-props"): + props[file_pattern] = ParseSubversionPropertyValues( + config.get("auto-props", file_pattern)) + return props + else: + return {} + +def ParseSubversionPropertyValues(props): + """Parse the given property value which comes from [auto-props] section and + returns a list whose element is a (svn_prop_key, svn_prop_value) pair. + + See the following doctest for example. + + >>> ParseSubversionPropertyValues('svn:eol-style=LF') + [('svn:eol-style', 'LF')] + >>> ParseSubversionPropertyValues('svn:mime-type=image/jpeg') + [('svn:mime-type', 'image/jpeg')] + >>> ParseSubversionPropertyValues('svn:eol-style=LF;svn:executable') + [('svn:eol-style', 'LF'), ('svn:executable', '*')] + """ + key_value_pairs = [] + for prop in props.split(";"): + key_value = prop.split("=") + assert len(key_value) <= 2 + if len(key_value) == 1: + # If value is not given, use '*' as a Subversion's convention. + key_value_pairs.append((key_value[0], "*")) + else: + key_value_pairs.append((key_value[0], key_value[1])) + return key_value_pairs + + +def GetSubversionPropertyChanges(filename): + """Return a Subversion's 'Property changes on ...' string, which is used in + the patch file. + + Args: + filename: filename whose property might be set by [auto-props] config. + + Returns: + A string like 'Property changes on |filename| ...' if given |filename| + matches any entries in [auto-props] section. None, otherwise. + """ + global svn_auto_props_map + if svn_auto_props_map is None: + svn_auto_props_map = LoadSubversionAutoProperties() + + all_props = [] + for file_pattern, props in svn_auto_props_map.items(): + if fnmatch.fnmatch(filename, file_pattern): + all_props.extend(props) + if all_props: + return FormatSubversionPropertyChanges(filename, all_props) + return None + + +def FormatSubversionPropertyChanges(filename, props): + """Returns Subversion's 'Property changes on ...' strings using given filename + and properties. + + Args: + filename: filename + props: A list whose element is a (svn_prop_key, svn_prop_value) pair. + + Returns: + A string which can be used in the patch file for Subversion. + + See the following doctest for example. + + >>> print FormatSubversionPropertyChanges('foo.cc', [('svn:eol-style', 'LF')]) + Property changes on: foo.cc + ___________________________________________________________________ + Added: svn:eol-style + + LF + + """ + prop_changes_lines = [ + "Property changes on: %s" % filename, + "___________________________________________________________________"] + for key, value in props: + prop_changes_lines.append("Added: " + key) + prop_changes_lines.append(" + " + value) + return "\n".join(prop_changes_lines) + "\n" + + +def RealMain(argv, data=None): + """The real main function. + + Args: + argv: Command line arguments. + data: Diff contents. If None (default) the diff is generated by + the VersionControlSystem implementation returned by GuessVCS(). + + Returns: + A 2-tuple (issue id, patchset id). + The patchset id is None if the base files are not uploaded by this + script (applies only to SVN checkouts). + """ + options, args = parser.parse_args(argv[1:]) + if options.help: + if options.verbose < 2: + # hide Perforce options + parser.epilog = ( + "Use '--help -v' to show additional Perforce options. " + "For more help, see " + "http://code.google.com/p/rietveld/wiki/CodeReviewHelp" + ) + parser.option_groups.remove(parser.get_option_group('--p4_port')) + parser.print_help() + sys.exit(0) + + global verbosity + verbosity = options.verbose + if verbosity >= 3: + logging.getLogger().setLevel(logging.DEBUG) + elif verbosity >= 2: + logging.getLogger().setLevel(logging.INFO) + + vcs = GuessVCS(options) + + base = options.base_url + if isinstance(vcs, SubversionVCS): + # Guessing the base field is only supported for Subversion. + # Note: Fetching base files may become deprecated in future releases. + guessed_base = vcs.GuessBase(options.download_base) + if base: + if guessed_base and base != guessed_base: + print "Using base URL \"%s\" from --base_url instead of \"%s\"" % \ + (base, guessed_base) + else: + base = guessed_base + + if not base and options.download_base: + options.download_base = True + logging.info("Enabled upload of base file") + if not options.assume_yes: + vcs.CheckForUnknownFiles() + if data is None: + data = vcs.GenerateDiff(args) + data = vcs.PostProcessDiff(data) + if options.print_diffs: + print "Rietveld diff start:*****" + print data + print "Rietveld diff end:*****" + files = vcs.GetBaseFiles(data) + if verbosity >= 1: + print "Upload server:", options.server, "(change with -s/--server)" + if options.use_oauth2: + options.save_cookies = False + rpc_server = GetRpcServer(options.server, + options.email, + options.host, + options.save_cookies, + options.account_type, + options.use_oauth2, + options.oauth2_port, + options.open_oauth2_local_webbrowser) + form_fields = [] + + repo_guid = vcs.GetGUID() + if repo_guid: + form_fields.append(("repo_guid", repo_guid)) + if base: + b = urlparse.urlparse(base) + username, netloc = urllib.splituser(b.netloc) + if username: + logging.info("Removed username from base URL") + base = urlparse.urlunparse((b.scheme, netloc, b.path, b.params, + b.query, b.fragment)) + form_fields.append(("base", base)) + if options.issue: + form_fields.append(("issue", str(options.issue))) + if options.email: + form_fields.append(("user", options.email)) + if options.reviewers: + for reviewer in options.reviewers.split(','): + CheckReviewer(reviewer) + form_fields.append(("reviewers", options.reviewers)) + if options.cc: + for cc in options.cc.split(','): + CheckReviewer(cc) + form_fields.append(("cc", options.cc)) + + # Process --message, --title and --file. + message = options.message or "" + title = options.title or "" + if options.file: + if options.message: + ErrorExit("Can't specify both message and message file options") + file = open(options.file, 'r') + message = file.read() + file.close() + if options.issue: + prompt = "Title describing this patch set: " + else: + prompt = "New issue subject: " + title = ( + title or message.split('\n', 1)[0].strip() or raw_input(prompt).strip()) + if not title and not options.issue: + ErrorExit("A non-empty title is required for a new issue") + # For existing issues, it's fine to give a patchset an empty name. Rietveld + # doesn't accept that so use a whitespace. + title = title or " " + if len(title) > 100: + title = title[:99] + '…' + if title and not options.issue: + message = message or title + + form_fields.append(("subject", title)) + # If it's a new issue send message as description. Otherwise a new + # message is created below on upload_complete. + if message and not options.issue: + form_fields.append(("description", message)) + + # Send a hash of all the base file so the server can determine if a copy + # already exists in an earlier patchset. + base_hashes = "" + for file, info in files.iteritems(): + if not info[0] is None: + checksum = md5(info[0]).hexdigest() + if base_hashes: + base_hashes += "|" + base_hashes += checksum + ":" + file + form_fields.append(("base_hashes", base_hashes)) + if options.private: + if options.issue: + print "Warning: Private flag ignored when updating an existing issue." + else: + form_fields.append(("private", "1")) + if options.send_patch: + options.send_mail = True + if not options.download_base: + form_fields.append(("content_upload", "1")) + if len(data) > MAX_UPLOAD_SIZE: + print "Patch is large, so uploading file patches separately." + uploaded_diff_file = [] + form_fields.append(("separate_patches", "1")) + else: + uploaded_diff_file = [("data", "data.diff", data)] + ctype, body = EncodeMultipartFormData(form_fields, uploaded_diff_file) + response_body = rpc_server.Send("/upload", body, content_type=ctype) + patchset = None + if not options.download_base or not uploaded_diff_file: + lines = response_body.splitlines() + if len(lines) >= 2: + msg = lines[0] + patchset = lines[1].strip() + patches = [x.split(" ", 1) for x in lines[2:]] + else: + msg = response_body + else: + msg = response_body + StatusUpdate(msg) + if not response_body.startswith("Issue created.") and \ + not response_body.startswith("Issue updated."): + sys.exit(0) + issue = msg[msg.rfind("/")+1:] + + if not uploaded_diff_file: + result = UploadSeparatePatches(issue, rpc_server, patchset, data, options) + if not options.download_base: + patches = result + + if not options.download_base: + vcs.UploadBaseFiles(issue, rpc_server, patches, patchset, options, files) + + payload = {} # payload for final request + if options.send_mail: + payload["send_mail"] = "yes" + if options.send_patch: + payload["attach_patch"] = "yes" + if options.issue and message: + payload["message"] = message + payload = urllib.urlencode(payload) + rpc_server.Send("/" + issue + "/upload_complete/" + (patchset or ""), + payload=payload) + return issue, patchset + + +def main(): + try: + logging.basicConfig(format=("%(asctime).19s %(levelname)s %(filename)s:" + "%(lineno)s %(message)s ")) + os.environ['LC_ALL'] = 'C' + RealMain(sys.argv) + except KeyboardInterrupt: + print + StatusUpdate("Interrupted.") + sys.exit(1) + + +if __name__ == "__main__": + main()