commit bbebd1590c9247f606a92d75eff74f304cbc92a4
parent c447e59f5704a3fde4a6d67635235b8afb7e3e42
Author: Vetle Haflan <vetle@haflan.dev>
Date: Fri, 15 Oct 2021 18:54:13 +0200
Merge branch 'master' of gl.haflan.dev:general/snippets
Diffstat:
A | findpresent.py | | | 55 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | organize-media.py | | | 83 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
2 files changed, 138 insertions(+), 0 deletions(-)
diff --git a/findpresent.py b/findpresent.py
@@ -0,0 +1,55 @@
+#!/usr/bin/python3
+
+import os
+import sys
+from hashlib import sha256
+
+def get_file_hash(fi):
+ with open(fi, 'rb') as f:
+ return sha256(f.read()).hexdigest()
+
+### Generate hash for all files
+def get_hash_dict():
+ hash_to_filename = {}
+ for rootdir, _, files in os.walk('.'):
+ try:
+ #use grep on the result instead?
+ #files = [f for f in files if filefilter.lower() in f.lower()]
+ for fi in files:
+ fpath = os.path.join(rootdir, fi)
+ fhash = get_file_hash(fpath)
+ if fhash not in hash_to_filename:
+ hash_to_filename[fhash] = fi
+ except Exception as e :
+ print(e)
+ return hash_to_filename
+
+def check_hashes(in_file):
+ hashes_to_find = {}
+ lines = open(in_file).read().split('\n')
+ for line in lines:
+ if ' ' not in line:
+ continue
+ fhash, fname = line.split(' ')
+ hashes_to_find[fhash] = fname
+ hashes_found = get_hash_dict()
+ num_found = 0
+ for fhash in hashes_to_find:
+ if fhash not in hashes_found:
+ print(hashes_to_find[fhash], f'not found ({fhash})')
+ else:
+ num_found += 1
+ print(num_found, 'files found')
+
+if len(sys.argv) < 2:
+ print('Use: findpresent.py list | check')
+ exit(1)
+if sys.argv[1] == 'list':
+ hash_to_filename = get_hash_dict()
+ for fhash in hash_to_filename:
+ print(fhash, hash_to_filename[fhash])
+if sys.argv[1] == 'check':
+ if len(sys.argv) < 3:
+ print('Use: findpresent.py check <checksum file>')
+ exit(1)
+ check_hashes(sys.argv[2])
diff --git a/organize-media.py b/organize-media.py
@@ -0,0 +1,83 @@
+#!/usr/bin/python3
+
+# Script that finds all media files in a source directory and moves them to a target
+# directory on the format <target>/<year>/<month>/ based on the media file date.
+
+from exif import Image
+from datetime import datetime as dt
+import os
+import sys
+import re
+
+DEBUG=False
+
+
+if len(sys.argv) >= 3:
+ NEW_MEDIA_DIR=sys.argv[1]
+ SORTED_MEDIA_DIR=sys.argv[2]
+else:
+ NEW_MEDIA_DIR=os.environ['NEW_MEDIA_DIR']
+ SORTED_MEDIA_DIR=os.environ['SORTED_MEDIA_DIR']
+
+ts_fmt_exif = '%Y:%m:%d %H:%M:%S'
+ts_fmts_filename = [
+ re.compile('.+_(?P<year>\d{4})(?P<month>\d\d)(?P<day>\d\d)_(?P<hour>\d\d)(?P<min>\d\d)(?P<sec>\d\d).+')
+]
+
+# Tries to get the date of a media file, first by reading EXIF data,
+# then by parsing the filename if unsuccessful.
+# Returns date as a touple: (year, month, day).
+def extract_img_date(filename):
+ if '.mp4' not in filename:
+ try:
+ img = Image(open(filename, 'rb'))
+ ts = dt.strptime(img.datetime, ts_fmt_exif)
+ if DEBUG:
+ print('Found date in exif data')
+ return (str(ts.year), str(ts.month).rjust(2, '0'), str(ts.day).rjust(2, '0'))
+ except Exception as e:
+ pass
+ # Then try different filename formats
+ for fmt in ts_fmts_filename:
+ match = fmt.match(filename)
+ if match:
+ if DEBUG:
+ print('Found date in filename')
+ return (match.group('year'), match.group('month'), match.group('day'))
+ return None
+
+# Takes a date touple (year, month, day) and creates <SORTED_MEDIA_DIR>/<year>/<month>
+# if it doesn't exist already
+def create_and_get_month_dir(date):
+ month_dir=os.path.join(SORTED_MEDIA_DIR, date[0], date[1])
+ if os.path.exists(month_dir):
+ if DEBUG:
+ print(f'Directory "{month_dir}" exists')
+ return month_dir
+ if DEBUG:
+ print(f'SIMULATED: Directory "{month_dir}" created')
+ else:
+ os.makedirs(month_dir)
+ return month_dir
+
+if not os.path.exists(NEW_MEDIA_DIR):
+ print(f'{NEW_MEDIA_DIR} does not exist')
+ exit(1)
+
+# Do the organization
+for [root, dirs, files] in os.walk(NEW_MEDIA_DIR):
+ for filename in files:
+ if not filename:
+ continue
+ old_path = os.path.join(root, filename)
+ date = extract_img_date(old_path)
+ if not date:
+ if DEBUG:
+ print('No date found for ' + filename)
+ continue
+ month_dir = create_and_get_month_dir(date)
+ new_path = os.path.join(month_dir, filename)
+ if DEBUG:
+ print(f'SIMULATED: Moved {old_path} to {new_path}')
+ else:
+ os.rename(old_path, new_path)