#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
-----------------------------------------------------------------------------

 Vinetto : a forensics tool to examine Thumbs.db files
 Copyright (C) 2005, 2006 by Michel Roukine

This file is part of Vinetto.

 Vinetto is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License as published
 by the Free Software Foundation; either version 2 of the License, or (at
 your option) any later version.

 Vinetto is distributed in the hope that it will be
 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 General Public License for more details.

 You should have received a copy of the GNU General Public License along
 with the vinetto package; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

-----------------------------------------------------------------------------
"""

__major__ = "0"
__minor__ = "8"
__micro__ = "0"
__maintainer__ = "Keven L. Ates"
__author__ = "Michel Roukine"
__location__ = "https://github.com/AtesComp/Vinetto"

import sys
import os
import errno
import argparse
from io import StringIO
import vinetto.vinreport

from struct import unpack
from binascii import unhexlify, hexlify
from time import ctime
from vinetto.vinutils import addCatEntry, nbCatEnt, nbTNstr, TNfname, TNfname2, \
                     catIndxOutOfSeq, tnStreamOutOfSeq, addTNStream, extractStats
from pkg_resources import resource_filename

try:
    # Python < 3
    unicode('')
except NameError:
    # Python >= 3
    unicode = str

PROG = os.path.basename(__file__).capitalize()
PPS_TYPES = ["undefined", "storage", "stream", "undefined", "undefined", "root"]
THUMBS_SUBDIR = ".thumbs"


def getargs():
    # Return arguments passed to vinetto on the command line.

    descstr = PROG + " - The Thumbnail File Parser"
    epilogstr = ("--- " + PROG + " " + __major__ + "." + __minor__ + "." + __micro__ + " ---\n" +
                 "Based on the original Vinetto by " + __author__ + "\n" +
                 "Updated by " + __maintainer__ + "\n" +
                 PROG + " is open source software\n" +
                 "  See: " + __location__)

    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, description=descstr, epilog=epilogstr)
    parser.add_argument("--version", action="version", version=epilogstr)
    parser.add_argument("-o", dest="outdir",
                        help="write thumbnails to DIR", metavar="DIR")
    parser.add_argument("-H", action="store_true", dest="htmlrep",
                        help="write html report to DIR (requires option -o)")
    parser.add_argument("-U", action="store_true", dest="encoding",
                        help="use utf8 encodings")
    parser.add_argument("-q", action="store_true", dest="quiet",
                        help="quiet output")
    parser.add_argument("-s", action="store_true", dest="symlinks",
                        help=("create symlink from the the image realname to the numbered name\n" +
                              "in DIR/" + THUMBS_SUBDIR + " " + "(requires option -o)\n" +
                              "NOTE: A Catalog containing the realname must exist for this\n" +
                              "      option to produce results"))
    parser.add_argument("thumbfile",
                        help="an input thumbnail file, like \"Thumb.db\"")
    pargs = parser.parse_args()

    if (pargs.thumbfile == None):
        parser.error("No Thumb.db file specified")

    if (pargs.outdir == None):
        if (pargs.htmlrep == True):
            parser.error("-H option requires -o with a directory name")
        if (pargs.symlinks == True):
            parser.error("-s option requires -o with a directory name")

    return (pargs.thumbfile, pargs.outdir, pargs.htmlrep, pargs.encoding, pargs.quiet, pargs.symlinks)


def getencodings():
    # What encoding we use?
    if utf8encoding:
        fileencoding = "utf8"
    else:
        fileencoding = "iso-8859-1"
    return fileencoding


def encodefilename(originame):
    # Convert filename to the global encoding...
    TNname = unicode(originame, "utf-16-le").encode(getencodings(), "replace")
    return TNname


def conv2pytime(win32filetime):
    # Convert win32 timestamp to python time.
    SECS_BETWEEN_EPOCHS = 11644473600
    SECS_TO_100NS = 10000000

    if win32filetime != 0:
        return (win32filetime // SECS_TO_100NS) - SECS_BETWEEN_EPOCHS
    return 0


def nextBlock(TDB, Table, indx):
    # Return next block
    iSAT = indx // 128  # SAT block number to search in
    iSECT = indx % 128 # SECTor to search in the SAT block
    offset = Table[iSAT] * 512 + 0x200 + iSECT * 4
    return unpack(tDB_endian+"l", TDB[offset:offset+4])[0]


def printBlock(name, pps_color, pps_PDID, pps_NDID, pps_SDID, pps_CID, pps_userflags,
               pps_tsCreate, pps_tsModify, pps_SID_firstSecDir, pps_SID_sizeDir):
    print("          Name: %s" % name)
    print("         Color: %s" % ("Black" if pps_color else "Red"))
    print("   Prev Dir ID: %d" % pps_PDID)
    print("   Next Dir ID: %d" % pps_NDID)
    print("   Sub  Dir ID: %d" % pps_SDID)
    print("      Class ID: %s" % hexlify(pps_CID))
    print("    User Flags: %s" % hexlify(pps_userflags))
    print("        Create: " + ctime(conv2pytime(pps_tsCreate)) )
    print("        Modify: " + ctime(conv2pytime(pps_tsModify)) )
    print("       1st Sec: %d" % pps_SID_firstSecDir)
    print("          Size: %d" % pps_SID_sizeDir)
    return


def symlink_force(target, link_name):
    try:
        os.symlink(target, link_name)
    except OSError as e:
        if e.errno == errno.EEXIST:
            os.remove(link_name)
            os.symlink(target, link_name)
        else:
            sys.stderr.write(" Error: Cannot create symlink %s to image %s\n" % (link_name, target))
            sys.exit(18)


# Beginning ...
tDBfname, outputdir, htmlrep, utf8encoding, quiet, symlinks = getargs()

# Testing thumbfile parameter...
if not os.access(tDBfname, os.F_OK):
    sys.stderr.write(" Error: " + tDBfname + " does not exist\n")
    sys.exit(10)
elif not os.path.isfile(tDBfname):
    sys.stderr.write(" Error: " + tDBfname + " not a file\n")
    sys.exit(10)
elif not os.access(tDBfname, os.R_OK):
    sys.stderr.write(" Error: " + tDBfname + " not readable\n")
    sys.exit(10)

# Opening Thumbs.db file
thumbsDB = open(tDBfname,"rb").read()
longueur = len(thumbsDB)
if (longueur % 512 ) != 0:
    sys.stderr.write(" Warning: Length of %s == %d, non multiple 512\n" % (tDBfname, longueur))

# Get MD5 of Thumbs.db file
md5tDB = ""
try:
    # Python >= 2.5
    from hashlib import md5
    md5tDB = md5(thumbsDB).hexdigest()
except:
    # Python < 2.5
    import md5
    md5tDB = md5.new(thumbsDB).hexdigest()
del md5

# Initializing extraction and optional html report
if (outputdir != None):
    # Testing DIR parameter...
    if not os.path.exists(outputdir):
        try:
            os.mkdir(outputdir)
            print(" Info: " + outputdir + " was created")
        except EnvironmentError as e:
            sys.stderr.write(" Error: Cannot create %s\n" % outputdir)
            sys.exit(11)
    elif not os.path.isdir(outputdir):
        sys.stderr.write(" Error: %s is not a directory\n" % outputdir)
        sys.exit(11)
    elif not os.access(outputdir, os.W_OK):
        sys.stderr.write(" Error: %s not writable\n" % outputdir)
        sys.exit(11)
    outputdir += "/"

    PIL_FOUND = True
    try:
        from PIL import Image
    except ImportError as e:
        PIL_FOUND = False
        sys.stderr.write("\n" +
                         " Warning: Cannot find PIL Package Image module.\n" +
                         "          Vinetto will only extract Type 2 thumbnails.\n" +
                         "\n")

    header       = open(resource_filename("vinetto", "data/header"), "rb").read()
    quantization = open(resource_filename("vinetto", "data/quantization"), "rb").read()
    huffman      = open(resource_filename("vinetto", "data/huffman"), "rb").read()

    if (htmlrep == True):
        report = vinetto.vinreport.HtRep(tDBfname, outputdir, getencodings(),
                                 (__major__ + "." + __minor__ + "." + __micro__))
        report.SetFileSection(longueur, md5tDB)

# -----------------------------------------------------------------------------
# Analyzing header block ...

sigOLE =      bytearray(b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1") # Standard Sig for OLE2
sigOLE_Beta = bytearray(b"\x0e\x11\xfc\x0d\xd0\xcf\x11\xe0") # Older Beta Sig for OLE2
if (thumbsDB[0x00:0x08] != sigOLE) and (thumbsDB[0x00:0x08] != sigOLE_Beta):
    sys.stderr.write(" Error: Header Signature not found in %s\n" % tDBfname)
    sys.exit(12)

tDB_GUID = thumbsDB[0x08:0x08+16]
tDB_revisionNo = unpack("<h", thumbsDB[0x18:0x18+2])[0]
tDB_versionNo = unpack("<h", thumbsDB[0x1a:0x1a+2])[0]

tDB_endianOrder = thumbsDB[0x1c:0x1c+2] # fffe=65534 OR feff=65279
tDB_endian = "<"
if (tDB_endianOrder == bytearray(b"\xff\xfe")):
    tDB_endian = ">"
#elif (tDB_endianOrder == bytearray(b"\xfe\xff")):
#    tDB_endian = "<"

tDB_sectorSize = unpack(tDB_endian+"h", thumbsDB[0x1e:0x1e+2])[0]
tDB_sectorSizeMini = unpack(tDB_endian+"h", thumbsDB[0x20:0x20+2])[0]

reserved = unpack(tDB_endian+"h", thumbsDB[0x22:0x22+2])[0]
reserved = unpack(tDB_endian+"l", thumbsDB[0x24:0x24+4])[0]
reserved = unpack(tDB_endian+"l", thumbsDB[0x28:0x28+4])[0]

tDB_SID_totalSecSAT = unpack(tDB_endian+"l", thumbsDB[0x2c:0x2c+4])[0]

tDB_SID_firstSecDir = unpack(tDB_endian+"l", thumbsDB[0x30:0x30+4])[0] #Root directory 1st block

reserved = unpack(tDB_endian+"l", thumbsDB[0x34:0x34+4])[0]

tDB_streamMinSize = unpack(tDB_endian+"l", thumbsDB[0x38:0x38+4])[0]

tDB_SID_firstSecSSAT = unpack(tDB_endian+"l", thumbsDB[0x3c:0x3c+4])[0]

tDB_SID_totalSecSSAT = unpack(tDB_endian+"l", thumbsDB[0x40:0x40+4])[0]

tDB_SID_firstSecMSAT = unpack(tDB_endian+"l", thumbsDB[0x44:0x44+4])[0]

tDB_SID_totalSecMSAT = unpack(tDB_endian+"l", thumbsDB[0x48:0x48+4])[0]

SATblocks = []
for i in range(tDB_SID_totalSecSAT):
    offset = 0x4c + (i * 4)
    SATblocks.append(unpack(tDB_endian+"l", thumbsDB[offset:offset + 4])[0])

# -----------------------------------------------------------------------------
# Analyzing Root Entry directory ...

i = tDB_SID_firstSecSSAT
SSATblocks = []
while i != -2:
    SSATblocks.append(i)
    i = nextBlock(thumbsDB, SATblocks, i)

currentBlock = tDB_SID_firstSecDir
offset = 0x200 + currentBlock * 0x200
firstSSATstreamBlock = unpack(tDB_endian+"l", thumbsDB[offset+0x74:offset+0x78])[0]

i = firstSSATstreamBlock
SSATstreamBlocks = []
while i != -2:
    SSATstreamBlocks.append(i)
    i = nextBlock(thumbsDB, SATblocks, i)

SID = 0
strSep = " ------------------------------------------------------"
if (not quiet):
    print(strSep)
    print(" File: %s" % tDBfname)
    print("   MD5: %s" % md5tDB)
    print(strSep)
while (currentBlock != -2):
    offset = 0x200 + currentBlock * 0x200
    for i in range(offset, offset+0x200, 0x80):
        pps_nameDir         = thumbsDB[i+0x00:i+0x40]
        pps_nameDirSize     = unpack(tDB_endian+"h", thumbsDB[i+0x40:i+0x42])[0]
        pps_type            = unpack("b",            thumbsDB[i+0x42:i+0x43])[0]
        pps_color           = unpack("?",            thumbsDB[i+0x43:i+0x44])[0]
        pps_PDID            = unpack(tDB_endian+"l", thumbsDB[i+0x44:i+0x48])[0]
        pps_NDID            = unpack(tDB_endian+"l", thumbsDB[i+0x48:i+0x4c])[0]
        pps_SDID            = unpack(tDB_endian+"l", thumbsDB[i+0x4c:i+0x50])[0]
        pps_CID             = thumbsDB[i+0x50:i+0x60]
        pps_userflags       = thumbsDB[i+0x60:i+0x64]
        pps_tsCreate        = unpack(tDB_endian+"Q", thumbsDB[i+0x64:i+0x6c])[0]
        pps_tsModify        = unpack(tDB_endian+"Q", thumbsDB[i+0x6c:i+0x74])[0]
        pps_SID_firstSecDir = unpack(tDB_endian+"l", thumbsDB[i+0x74:i+0x78])[0]
        pps_SID_sizeDir     = unpack(tDB_endian+"l", thumbsDB[i+0x78:i+0x7c])[0]

        """
        pps_type: Stream Types
          0x00 empty
          0x01 storage
          0x02 stream
          0x03 lock bytes
          0x04 property
          0x05 root storage
        """

        rawname = unicode(pps_nameDir, "utf-16-le")[0:(pps_nameDirSize // 2 - 1)]
        if (pps_type == 2): # stream files extraction
            if (not quiet):
                print(" Stream Entry\n --------------------")
                printBlock(rawname, pps_color, pps_PDID, pps_NDID, pps_SDID, pps_CID, pps_userflags,
                           pps_tsCreate, pps_tsModify, pps_SID_firstSecDir, pps_SID_sizeDir)

            #SIDstr  = "%04i" % SID
            SIDstr = rawname[::-1]
            if (len(SIDstr) < 4):
                SIDstr = "%04i" % int(SIDstr)
                if not os.path.exists(outputdir + THUMBS_SUBDIR):
                    try:
                        os.mkdir(outputdir + THUMBS_SUBDIR)
                    except EnvironmentError as e:
                        sys.stderr.write(" Error: Cannot create %s\n" % outputdir + THUMBS_SUBDIR)
                        sys.exit(13)

            bytesToWrite = pps_SID_sizeDir

            if (pps_SID_sizeDir >= 4096): # stream located in the SAT
                sr = bytearray(b"")
                currentStreamBlock = pps_SID_firstSecDir
                while (currentStreamBlock != -2):
                    sOffset = 0x200 + currentStreamBlock * 0x200
                    if (bytesToWrite >= 512):
                        sr = sr + thumbsDB[sOffset:sOffset + 512]
                    else:
                        sr = sr + thumbsDB[sOffset:sOffset + bytesToWrite]
                    bytesToWrite = bytesToWrite - 512
                    currentStreamBlock = nextBlock(thumbsDB, SATblocks, currentStreamBlock)

            else:                # stream located in the SSAT
                sr = bytearray(b"")
                currentStreamMiniBlock = pps_SID_firstSecDir
                while (currentStreamMiniBlock != -2):
                    # Computing offset of the miniBlock to copy
                    # 1 : Which block of the SSATstream?
                    nb = currentStreamMiniBlock // 8
                    # 2 : Where is this block?
                    bl = SSATstreamBlocks[nb]
                    # 3 : Which offset from the start of block?
                    ioffset = (currentStreamMiniBlock % 8) * 64

                    sOffset = 0x200 + bl*0x200 + ioffset

                    if (bytesToWrite >= 64):
                        sr = sr + thumbsDB[sOffset:sOffset + 64 ]
                    else:
                        sr = sr + thumbsDB[sOffset:sOffset + bytesToWrite]
                    bytesToWrite = bytesToWrite - 64
                    # Computing next currentStreamMiniBlock
                    currentStreamMiniBlock = nextBlock(thumbsDB, SSATblocks, currentStreamMiniBlock)

            # Extraction stream processing ... ---------------------------------

            longueur = len(sr)

            # Is this a Catalog?
            if (rawname == "Catalog"):
                # -------------------------------------------------------------
                # Skipping catalog header block ...

                recordLen = unpack(tDB_endian+"h", sr[0:2])[0]
                indcat = recordLen
                SID = SID - 1

                # -------------------------------------------------------------
                # Analyzing Catalog entries ...

                while (indcat < longueur):
                    recordLen   = unpack(tDB_endian+"h", sr[indcat   :indcat+2])[0]
                    num         = unpack(tDB_endian+"l", sr[indcat+4 :indcat+8])[0]
                    timestamp   = unpack(tDB_endian+"Q", sr[indcat+8 :indcat+16])[0]
                    nameLen     = recordLen - 0x14

                    originame   = sr[indcat+16 :indcat+16+nameLen]
                    TNid = "%04i" % num
                    TNtimestamp = ctime(conv2pytime(timestamp))
                    TNname = encodefilename(originame)
                    if (symlinks):
                        #os.system( "ln -fs " + outputdir + THUMBS_SUBDIR + "/" + TNid + ".jpg " + "\"" +
                        #            outputdir + TNname + "\"" )
                        symlink_force(outputdir + THUMBS_SUBDIR + "/" + TNid + ".jpg",
                                      outputdir + TNname)
                    print(" " + TNid + " " + TNtimestamp + " " + TNname)
                    addCatEntry(num, TNtimestamp, TNname)
                    indcat = indcat + recordLen

            else:
                # Is EOI at end of stream?
                if (sr[longueur-2:longueur] != bytearray(b"\xff\xd9")): # Not End Of Image (EOI)
                    sys.stderr.write(" Error: Missing End of Image (EOI) marker in stream %d\n" % SID)
                    sys.exit(14)
                # --------------------------- Header 1 ------------------------
                # Get file offset
                headOffset = unpack(tDB_endian+"l", sr[0:4])[0]
                headRevision = unpack(tDB_endian+"l", sr[4:8])[0]

                # Is length OK?
                if (unpack(tDB_endian+"H", sr[8:10])[0] != (longueur - headOffset)):
                    sys.stderr.write(" Error: Header 1 length mismatch in stream %d\n" % SID)
                    sys.exit(15)
                # --------------------------- header 2 ------------------------
                # Is it a type 2 thumbnail? (full jpeg)
                if (sr[headOffset:headOffset+4] == bytearray(b"\xff\xd8\xff\xe0")):
                    if (outputdir != None):
                        if (len(SIDstr) < 4):
                            fname = THUMBS_SUBDIR + "/" + TNfname(SIDstr, "2")
                        else:
                            fname = TNfname2(rawname, "2")
                        open(outputdir + fname + ".jpg", "wb").write(sr[headOffset:])

                elif (unpack(tDB_endian+"L", sr[headOffset:headOffset+4])[0] == 1):
                    # Is second header OK?
                    if (unpack(tDB_endian+"H", sr[headOffset+4:headOffset+6])[0] != (longueur - headOffset - 0x10)):
                        sys.stderr.write(" Error: Header 2 length mismatch in stream %d\n" % SID)
                        sys.exit(16)
                    if (outputdir != None):
                        # Type 1 TN processing ...
                        if (PIL_FOUND):
                            type1sr = header[:0x14] + quantization + sr[0x1e:0x34] + huffman + sr[0x34:]

                            im = Image.open(StringIO.StringIO(type1sr))
                            r, g, b, a = im.split()
                            im = Image.merge("RGB", (b, g, r))
                            im = im.transpose(Image.FLIP_TOP_BOTTOM)
                            if (len(SIDstr) < 4):
                                fname = TNfname(SIDstr, "1")
                            else:
                                fname = TNfname2(rawname, "1")
                            im.save(outputdir + fname + ".jpg", "JPEG", quality=100)
                        else: # Cannot extract : PIL Image not imported
                            addTNStream(int(SIDstr), "1", "")
                else:
                    sys.stderr.write(" Error: Header 2 not found in stream %d\n" % SID)
                    sys.exit(17)

            if (not quiet):
                print(strSep)
            # -----------------------------------------------------------------

        elif pps_type == 5: # Root Entry
            if (not quiet):
                print(" Root Entry\n --------------------")
                printBlock(rawname, pps_color, pps_PDID, pps_NDID, pps_SDID, pps_CID, pps_userflags,
                           pps_tsCreate, pps_tsModify, pps_SID_firstSecDir, pps_SID_sizeDir)
            if htmlrep == True:
                report.SetRE(pps_color, pps_PDID, pps_NDID, pps_SDID, pps_CID, pps_userflags,
                             ctime(conv2pytime(pps_tsCreate)), ctime(conv2pytime(pps_tsModify)),
                             pps_SID_firstSecDir, pps_SID_sizeDir)
            if (not quiet):
                print(strSep)

        SID = SID + 1

    currentBlock = nextBlock(thumbsDB, SATblocks, currentBlock)

if catIndxOutOfSeq() == True:
    sys.stderr.write(" Info: %s Catalog : index number out of usual sequence\n" % tDBfname)

if tnStreamOutOfSeq() == True:
    sys.stderr.write(" Info: %s : thumbnail stream index number out of usual sequence\n" % tDBfname)

if (outputdir != None):
    if (nbCatEnt() > 0) and (nbCatEnt() != nbTNstr()):
        sys.stderr.write(" Warning: %s --> Counts: Extracted != Catalog\n" % tDBfname)
    else:
        sys.stderr.write(" Info: %s --> No Catalog\n" % tDBfname)

statstring = extractStats(outputdir)
print(statstring)

if (htmlrep == True):
    report.flush(statstring)

