#!/usr/bin/env python

#Identify false image files, for example a file that has a gif header and a php body
#author: stack@inventati.org
#copyleft: 2013-01-29

import os
import sys
from PIL import Image
import imghdr

#Log wrong image files found
def log(path, fd):
    fd.write(path+"\n") 

#Echo a message if verbose is enabled
def verbose(message):
    if debug is not None:
        print(message)

#Check if a given file is an image, trying not to be fooled
def checkfimage(fpath):
    #imghdr is fooled with false images, so use it to check if file would be a valid image file
    if imghdr.what(fpath):        
        try: 
            #PIL Image is not fooled
            Image.open(fpath)
        except:
            return False
    return True

#Generate the file paths to traverse, or a single path if a file name was given
def getfiles(path):
    if os.path.isdir(sys.argv[1]):
        for root, dirs, files in os.walk(sys.argv[1]):
            for name in files:
                yield os.path.join(root, name)
    else:
        yield path

if __name__ == "__main__":
    if len(sys.argv) < 3:
        sys.exit('Usage: %s path logfile [-v]' % sys.argv[0])

    if not os.path.exists(sys.argv[1]):
        sys.exit('ERROR: path %s was not found!' % sys.argv[1])

    try:
        fdlog = open(sys.argv[2],"w")
    except:
        sys.exit('ERROR: unable to open logfile' % sys.argv[2])

    debug = 1 if len(sys.argv) == 4 and sys.argv[3] == "-v" else None

    nfiles = 0 
    for fpath in getfiles(sys.argv[1]):
        verbose("Checking: " + fpath)
        nfiles += 1 
        if not checkfimage(fpath):
            log(fpath, fdlog)
            verbose("ERR not an image: " + fpath)
    fdlog.close()
    print ("Checked %d files."% nfiles)
