Initial implementation, argparse not well done
authorstack <stack@inventati.org>
Tue, 29 Jan 2013 01:45:26 +0000 (02:45 +0100)
committerstack <stack@inventati.org>
Tue, 29 Jan 2013 01:45:26 +0000 (02:45 +0100)
idfimage.py [new file with mode: 0755]

diff --git a/idfimage.py b/idfimage.py
new file mode 100755 (executable)
index 0000000..88436e8
--- /dev/null
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+#Identify false image files, for example a file that has a gif header and a php body
+#author: stack@inventati.org
+#copyleft: 2013-01-29
+
+import os
+import sys
+from PIL import Image
+import imghdr
+
+#Log wrong image files found
+def log(path, fd):
+    fd.write(path+"\n") 
+
+#Echo a message if verbose is enabled
+def verbose(message):
+    if debug is not None:
+        print(message)
+
+#Check if a given file is an image, trying not to be fooled
+def checkfimage(fpath):
+    #imghdr is fooled with false images, so use it to check if file would be a valid image file
+    if imghdr.what(fpath):        
+        try: 
+            #PIL Image is not fooled
+            Image.open(fpath)
+        except:
+            return False
+    return True
+
+#Generate the file paths to traverse, or a single path if a file name was given
+def getfiles(path):
+    if os.path.isdir(sys.argv[1]):
+        for root, dirs, files in os.walk(sys.argv[1]):
+            for name in files:
+                yield os.path.join(root, name)
+    else:
+        yield path
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        sys.exit('Usage: %s path logfile [-v]' % sys.argv[0])
+
+    if not os.path.exists(sys.argv[1]):
+        sys.exit('ERROR: path %s was not found!' % sys.argv[1])
+
+    try:
+        fdlog = open(sys.argv[2],"w")
+    except:
+        sys.exit('ERROR: unable to open logfile' % sys.argv[2])
+
+    debug = 1 if len(sys.argv) == 4 and sys.argv[3] == "-v" else None
+
+    for fpath in getfiles(sys.argv[1]):
+        verbose("Checking: " + fpath)
+        if not checkfimage(fpath):
+            log(fpath, fdlog)
+            verbose("ERR not an image: " + fpath)
+
+    fdlog.close()