#!/usr/bin/env python3
"""
Cron-invoked: send the next due outreach email, then exit.

Designed to be invoked frequently (e.g. every 3 minutes during business
hours). Each tick claims at most ONE row from outreach_recipients and sends
it via Gmail SMTP. The throttle (cap, weekday, work-window) is enforced
here, so the cron schedule itself can be loose.

Environment / state:
  - config.json           SMTP creds, throttle, kill-switch
  - outreach_recipients   queue table (status='pending', scheduled_at <= NOW)
  - send.log              human log
"""
import json
import os
import smtplib
import socket
import sys
import time
import traceback
from datetime import datetime, timezone, timedelta
from email.mime.text import MIMEText
from email.utils import formataddr, formatdate, make_msgid

import pymysql

# Paths inside the cron container — the host's outreach dir is bind-mounted
# to /outreach by docker-compose.
ROOT     = "/outreach/send"
CONFIG   = f"{ROOT}/config.json"
LOG_FILE = f"{ROOT}/send.log"


def log(msg):
    line = f"[{datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}Z] {msg}"
    print(line, file=sys.stderr)
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        f.write(line + "\n")


def in_window(cfg):
    now = datetime.now(timezone.utc)
    if cfg["throttle"]["weekdays_only"] and now.weekday() >= 5:
        return False, "weekend"
    start_h, start_m = map(int, cfg["throttle"]["send_window_utc"][0].split(":"))
    end_h,   end_m   = map(int, cfg["throttle"]["send_window_utc"][1].split(":"))
    start = now.replace(hour=start_h, minute=start_m, second=0, microsecond=0)
    end   = now.replace(hour=end_h,   minute=end_m,   second=0, microsecond=0)
    if not (start <= now <= end):
        return False, f"outside window {cfg['throttle']['send_window_utc']}"
    return True, "ok"


def daily_cap_for(cfg, db_cur):
    """Warmup ramp: day 1 → caps[0], day 2 → caps[1], day 3+ → caps[-1]."""
    db_cur.execute(
        "SELECT MIN(DATE(sent_at)) FROM outreach_recipients WHERE sent_at IS NOT NULL"
    )
    first = db_cur.fetchone()[0]
    if not first:
        return cfg["throttle"]["warmup_caps"][0]
    days_since = (datetime.now(timezone.utc).date() - first).days
    caps = cfg["throttle"]["warmup_caps"]
    return caps[min(days_since, len(caps) - 1)]


def sent_today(db_cur):
    db_cur.execute(
        "SELECT COUNT(*) FROM outreach_recipients "
        "WHERE sent_at IS NOT NULL AND DATE(sent_at) = UTC_DATE()"
    )
    return db_cur.fetchone()[0]


def claim_next(db_cn, db_cur):
    """Atomically claim the next pending row whose scheduled_at <= now and
    whose recipient hasn't unsubscribed since seeding. Returns the row or
    None. Uses status='sent' as a temporary lock; on send failure we revert
    to 'pending' or set 'failed'."""
    db_cur.execute(
        "SELECT id, email, blog_url, blog_host, category, token, retry_count "
        "FROM outreach_recipients "
        "WHERE status = 'pending' AND scheduled_at <= UTC_TIMESTAMP() "
        "ORDER BY scheduled_at ASC LIMIT 1 FOR UPDATE"
    )
    row = db_cur.fetchone()
    if not row:
        return None
    db_cur.execute(
        "UPDATE outreach_recipients SET status = 'sent', sent_at = UTC_TIMESTAMP() "
        "WHERE id = %s",
        (row[0],),
    )
    db_cn.commit()
    return {
        "id": row[0], "email": row[1], "blog_url": row[2], "blog_host": row[3],
        "category": row[4], "token": row[5], "retry_count": row[6],
    }


def revert_for_retry(db_cn, db_cur, row_id, error_msg, retry_count):
    """Send failed but retryable: roll back to pending with bumped retry."""
    if retry_count >= 3:
        db_cur.execute(
            "UPDATE outreach_recipients "
            "SET status = 'failed', last_error = %s, retry_count = %s "
            "WHERE id = %s",
            (error_msg[:500], retry_count + 1, row_id),
        )
    else:
        # Re-schedule 30 minutes out so we don't tight-loop on a flaky address
        db_cur.execute(
            "UPDATE outreach_recipients "
            "SET status = 'pending', sent_at = NULL, last_error = %s, "
            "    retry_count = %s, "
            "    scheduled_at = UTC_TIMESTAMP() + INTERVAL 30 MINUTE "
            "WHERE id = %s",
            (error_msg[:500], retry_count + 1, row_id),
        )
    db_cn.commit()


def mark_bounced(db_cn, db_cur, row_id, error_msg):
    db_cur.execute(
        "UPDATE outreach_recipients "
        "SET status = 'bounced', bounced_at = UTC_TIMESTAMP(), last_error = %s "
        "WHERE id = %s",
        (error_msg[:500], row_id),
    )
    db_cn.commit()


DEFAULT_SUBJECT = "A small invite for {blog_host}"
DEFAULT_BODY = (
    "Hi,\n\n"
    "I'm BK, founder of WriteUpCafe. I was putting together a directory of\n"
    "independent blogs and came across {blog_url} — wanted to reach\n"
    "out personally.\n\n"
    "We have a free directory section where bloggers in your space can list\n"
    "their blogsite so our 525K+ writers and readers can discover them.\n"
    "Quick read on what's involved (no signup hoops, no upsell):\n"
    "{welcome_url}\n\n"
    "If this isn't for you, just ignore — I won't follow up.\n\n"
    "— BK\n"
    "bk@writeupcafe.com\n\n"
    "Don't want emails from me: {unsub_url}\n"
)


def load_setting(db_cur, key, default):
    """Read a value from site_settings; fall back to default if missing."""
    db_cur.execute("SELECT setting_value FROM site_settings WHERE setting_key = %s", (key,))
    r = db_cur.fetchone()
    return r[0] if r and r[0] is not None else default


def build_message(cfg, row, subject_tpl=None, body_tpl=None):
    base    = cfg['site']['base_url']
    welcome = base + '/welcome?t=' + row['token']
    submit  = base + cfg['site']['submit_path'] + '?t=' + row['token']
    unsub   = base + '/u/' + row['token']

    fields = {
        'blog_url':    row['blog_url'],
        'blog_host':   row['blog_host'],
        'token':       row['token'],
        'welcome_url': welcome,
        'submit_url':  submit,
        'unsub_url':   unsub,
    }
    subject = (subject_tpl or DEFAULT_SUBJECT).format(**fields)
    body    = (body_tpl    or DEFAULT_BODY).format(**fields)

    msg = MIMEText(body, "plain", "utf-8")
    msg["Subject"] = subject
    msg["From"]    = formataddr((cfg["smtp"]["from_name"], cfg["smtp"]["from_email"]))
    msg["To"]      = row["email"]
    msg["Reply-To"]= cfg["smtp"]["reply_to"]
    msg["Date"]    = formatdate(localtime=True)
    msg["Message-ID"] = make_msgid(domain="writeupcafe.com")
    # RFC 8058 one-click unsubscribe header — Gmail/Outlook honour this and
    # show a native "Unsubscribe" button on the message, which suppresses the
    # spam-flag impulse for legitimate recipients.
    msg["List-Unsubscribe"] = f"<{unsub}>"
    msg["List-Unsubscribe-Post"] = "List-Unsubscribe=One-Click"
    return msg


def send_smtp(cfg, msg):
    s = smtplib.SMTP(cfg["smtp"]["host"], cfg["smtp"]["port"], timeout=30)
    try:
        s.ehlo(); s.starttls(); s.ehlo()
        s.login(cfg["smtp"]["username"], cfg["smtp"]["app_password"])
        s.sendmail(cfg["smtp"]["from_email"], [msg["To"]], msg.as_string())
    finally:
        try: s.quit()
        except Exception: pass


def main():
    cfg = json.load(open(CONFIG))
    if cfg.get("paused"):
        log("paused via config.json — exiting")
        return

    ok, why = in_window(cfg)
    if not ok:
        log(f"out of window ({why}) — exiting")
        return

    cn = pymysql.connect(
        host=cfg["db"]["host"], port=cfg["db"]["port"],
        user=cfg["db"]["user"], password=cfg["db"]["password"],
        database=cfg["db"]["name"], autocommit=False, charset="utf8mb4",
    )
    cur = cn.cursor()
    try:
        # Live admin controls — read on every tick so changes from
        # /admin/outreach-campaign.php take effect immediately.
        if load_setting(cur, "outreach_paused", "0") == "1":
            log("paused via admin panel — exiting")
            return
        subject_tpl = load_setting(cur, "outreach_subject_template", DEFAULT_SUBJECT)
        body_tpl    = load_setting(cur, "outreach_body_template",    DEFAULT_BODY)

        cap = daily_cap_for(cfg, cur)
        sent = sent_today(cur)
        if sent >= cap:
            log(f"daily cap reached ({sent}/{cap}) — exiting")
            return

        row = claim_next(cn, cur)
        if not row:
            log("no due rows — exiting")
            return

        msg = build_message(cfg, row, subject_tpl, body_tpl)
        try:
            send_smtp(cfg, msg)
            log(f"sent id={row['id']} to={row['email']} blog={row['blog_host']} "
                f"({sent + 1}/{cap} today)")
        except smtplib.SMTPRecipientsRefused as e:
            mark_bounced(cn, cur, row["id"], f"recipient refused: {e}")
            log(f"BOUNCE id={row['id']} {row['email']} — {e}")
        except smtplib.SMTPDataError as e:
            # 5xx codes from sendmail (data phase) — typically permanent
            code = getattr(e, "smtp_code", 0)
            if code and 500 <= code < 600:
                mark_bounced(cn, cur, row["id"], f"smtp 5xx: {e}")
                log(f"BOUNCE id={row['id']} {row['email']} — {e}")
            else:
                revert_for_retry(cn, cur, row["id"], str(e), row["retry_count"])
                log(f"retry id={row['id']} {row['email']} — {e}")
        except (smtplib.SMTPException, socket.error, OSError) as e:
            revert_for_retry(cn, cur, row["id"], f"{type(e).__name__}: {e}", row["retry_count"])
            log(f"retry id={row['id']} {row['email']} — {type(e).__name__}: {e}")
    except Exception:
        log("FATAL " + traceback.format_exc())
        raise
    finally:
        cur.close(); cn.close()


if __name__ == "__main__":
    main()
