#!/usr/bin/python

# $Id: nmdupes,v 1.2 2022/11/20 16:32:43 bscott Exp bscott $

# Find duplicate messages in Notmuch by Message-ID.
#
# Given a Notmuch search query, retrieve the Message-ID for all matches.
# Report on stdout any messages which duplicate a seen Message-ID.
# Each report line lists both file names, separated by tab.
# Problems are reported to stderr.

from notmuch import Query, Database
import sys

if (len(sys.argv) < 2):
    print("missing arguments: notmuch query", file=sys.stderr)
    sys.exit(1)

# build query string from arguments
q = None
for arg in sys.argv[1:]:
    if (q):
        q += " " + arg
    else:
        q = arg

db = Database(None)
msgs = Query(db, q).search_messages()

dupect = msgct = 0

for msg in msgs:
    msgct += 1
    fgen = msg.get_filenames()
    flist = list(fgen)
    # same msg stored in more than one file is by definition a dupe
    if (len(flist) > 1):
        first = None
        for f in flist:
            if (not first): # don't have a first file name yet
                first = f
            else:
                # first file name is half the dupe-pair for all other names
                print(f"{first}\t{f}")
                dupect += 1

print(f"{msgct} messages, {dupect} duplicates", file=sys.stderr)
