#!/usr/bin/python3
#
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The Fuck You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING for more details.
#
# Meant to be run by the `rtmailarchive` user.

import email.parser
import psycopg2
import os
import os.path
from datetime import datetime, timedelta

DEBUG = True

MAILDIR_ROOT = os.path.join(os.environ['HOME'], 'Maildir')

KEEP_FOR_MAX_DAYS = 30

RT_CONNINFO = "dbname=rtdb"

SELECT_TICKET_QUERY = """
    PREPARE HasTicket (text) AS
    SELECT DISTINCT Tickets.Id
      FROM                 Tickets
                INNER JOIN Transactions
                        ON    (Transactions.ObjectType = 'RT::Ticket'
                           AND Transactions.ObjectId = Tickets.Id)
           LEFT OUTER JOIN Attachments
                        ON Attachments.TransactionId = Transactions.Id
     WHERE Attachments.MessageId = $1
"""

SELECT_HAM_TICKET_QUERY = """
    PREPARE IsHam (text) AS
    SELECT DISTINCT Tickets.Id
      FROM                 Tickets
                INNER JOIN Transactions
                        ON    (Transactions.ObjectType = 'RT::Ticket'
                           AND Transactions.ObjectId = Tickets.Id)
           LEFT OUTER JOIN Attachments
                        ON Attachments.TransactionId = Transactions.Id
     WHERE Attachments.MessageId = $1
       AND Tickets.Status NOT IN ('rejected', 'deleted')
"""


# The weird “E'BQoCAAAAAQiA\n'” below is the value stored when only
# uid 0 has reported the ticket as spam. This is what happens when
# SpamAssassin already had detected the message as spam.

SELECT_SPAM_TICKET_QUERY = """
    PREPARE IsSpam (text) AS
    SELECT DISTINCT Tickets.Id
      FROM                 Tickets
                INNER JOIN Transactions
                        ON    (Transactions.ObjectType = 'RT::Ticket'
                           AND Transactions.ObjectId = Tickets.Id)
           LEFT OUTER JOIN Attachments ON Attachments.TransactionId = Transactions.Id
           LEFT OUTER JOIN Attributes
                        ON    (Attributes.Name = 'SpamReports'
                           AND Attributes.ObjectType = 'RT::Ticket'
                           AND Attributes.ObjectId = Tickets.Id)
     WHERE Attachments.MessageId = $1
       AND Attributes.Content != E'BQoCAAAAAQiA\\n'
"""

RENAME_SPAMREPORTS_QUERY = """
   PREPARE RenameSpamReports (text) AS
    UPDATE Attributes
       SET Name = 'OldSpamReports'
      FROM                 Tickets
                INNER JOIN Transactions
                        ON    (Transactions.ObjectType = 'RT::Ticket'
                           AND Transactions.ObjectId = Tickets.Id)
           LEFT OUTER JOIN Attachments
                        ON Attachments.TransactionId = Transactions.Id
     WHERE Attributes.Name = 'SpamReports'
       AND Attributes.ObjectType = 'RT::Ticket'
       AND Attributes.ObjectId = Tickets.Id
       AND Attachments.MessageId = $1
"""

EMAIL_PARSER = email.parser.Parser()

if DEBUG:
    def log(msg):
        print(msg)
else:
    def log(msg):
        pass

def execute_and_fetchone(statement, msg_id):
    global cur

    cur.execute("EXECUTE {} (%s)".format(statement), (msg_id,))
    return cur.fetchone() is not None

def rename_spamreports(msg_id):
    global cur

    cur.execute("EXECUTE RenameSpamReports (%s)", (msg_id,))
    log('{}: renamed spam reports {}'.format(msg_id, cur.rowcount))
    if cur.rowcount != 1:
        raise ValueError('Unable to rename previous SpamReports attrbute.')

def has_ticket(msg_id):
    return execute_and_fetchone('HasTicket', msg_id)

def is_ham(msg_id):
    return execute_and_fetchone('IsHam', msg_id)

def is_spam(msg_id):
    return execute_and_fetchone('IsSpam', msg_id)

def move_message(learntype, path):
    dest = '{}/.{}-tolearn/cur/{}'.format(MAILDIR_ROOT, learntype, os.path.basename(path))
    os.rename(path, dest)
    # ensure the message can be read later by debian-spamd
    os.chmod(dest, 0o640)

def handle_message(path):
    msg = EMAIL_PARSER.parse(open(path), headersonly=True)
    msg_id = msg['Message-Id']
    if msg_id is None or not msg_id.startswith('<') or not msg_id.endswith('>'):
        log("{}: bad Message-Id, removing.".format(path))
        os.unlink(path)
        return
    msg_id = msg_id[1:-1] # remove '<' and '>'
    if not has_ticket(msg_id):
        log("{}: does not exist in database, removing.".format(path))
        os.unlink(path)
        return
    if is_ham(msg_id):
        log("{}: identified as ham.".format(path))
        move_message('ham', path)
        rename_spamreports(msg_id)
        return
    if is_spam(msg_id):
        log("{}: identified as spam.".format(path))
        move_message('spam', path)
        return
    mtime = datetime.fromtimestamp(os.stat(path).st_mtime)
    limit = datetime.now() - timedelta(days=KEEP_FOR_MAX_DAYS)
    if mtime <= limit:
        log("{}: too old, removing.".format(path))
        os.unlink(path)
        return
    # well, it's not identified ham, not identified spam, and not too old
    log("{}: unknown, keeping for {} more days.".format(path, (mtime - limit).days))

def scan_directory(dir_path):
    for filename in os.listdir(dir_path):
        path = os.path.join(dir_path, filename)
        handle_message(path)

con = None
cur = None

if __name__ == '__main__':
    try:
        con = psycopg2.connect(RT_CONNINFO)
        con.set_session(autocommit=True)
        cur = con.cursor()
        # load prepared statements
        cur.execute(SELECT_TICKET_QUERY)
        cur.execute(SELECT_HAM_TICKET_QUERY)
        cur.execute(SELECT_SPAM_TICKET_QUERY)
        cur.execute(RENAME_SPAMREPORTS_QUERY)
        # look at messages
        for subdir in ('new', 'cur'):
            scan_directory(os.path.join(MAILDIR_ROOT, subdir))
    finally:
        if cur:
            cur.close()
        if con:
            con.close()
