/* $Id: nocem.c,v 1.3 1998/08/04 20:18:57 proff Exp $
 * $Copyright$
 */

#include "nglobal.h"

#include "reg.h"
#include "group.h"
#include "xover.h"
#include "article.h"
#include "history.h"

#include "nocem.h"

#define bs bigToStr

static regex_t nocemFromPreg;
static regex_t nocemTypePreg;
static regex_t nocemActionPreg;
static regex_t nocemPGPgoodPreg;

static bool nocem_regex_set(regex_t *preg, char *pat, char *field)
{
    int err_code;
    if (!field)
	{
	    con->nocem = FALSE;
	    logen (("nocem enabled, but '%s' unspecified, nocem disabled", field));
	    return FALSE;
	}
    if ((err_code = nn_regcomp(preg, pat, REG_EXTENDED|REG_NOSUB|REG_ICASE))!=0)
	{
	    char errbuf[MAX_LINE];
	    regerror(err_code, preg, errbuf, sizeof errbuf);
	    logen (("bad regular expression in '%s' (nocem now disabled): %s", field, errbuf));
	    con->nocem = FALSE;
	    return FALSE;
	}
    return TRUE;
}


EXPORT bool nocemInit()
{
    if (!con->nocem)
	return FALSE;
    if (!nocem_regex_set(&nocemFromPreg, con->nocemFrom, "nocemFrom") ||
        !nocem_regex_set(&nocemTypePreg, con->nocemType, "nocemType") ||
        !nocem_regex_set(&nocemActionPreg, con->nocemAction, "nocemAction"))
	return FALSE;
    if (con->nocemPGP)
	{
	    setenv ("PGPPATH", con->nocemPGPPATH, 1);
	    if (!nocem_regex_set(&nocemPGPgoodPreg, con->nocemPGPgood, "nocemPGPgood"))
		return FALSE;
	}
    return TRUE;
}

/*
 * pat should be prepended with \n
 */

static bool nocem_regex_match_header(regex_t *preg, char *s, char *pat)
{
    char *p;
    char *eol;
    bool ret;
    p = strcasestr(s, pat);
    if (!p)
	{
	bad_hdr:
	    logwn (("nocem message missing '%s' header '%.128s'", pat+1, s));
	    return FALSE;
	}
    p += strlen(pat);
    SKIPWHITE (p);
    eol = strchr(p, '\r');
    if (!eol)
	goto bad_hdr;
    *eol = '\0';
    ret = (nn_regexec(preg, p, eol - p, 0, 0, 0) == 0);
    *eol = '\r';
    return ret;
}

static enum {nh_ok, nh_dup, nh_err} nocem_hide(struct nocem_stats *st, char *msgid)
{
    bool ret;
    char *p;
    p = hisGet(msgid);
    if (p)
	{
	    if (strEq(p, SPAM))
		logn (("<%s> already clasified as '%s'", msgid, p));
	    else
		logn (("<%s> aready classified as a genuine article, can't reclassify as spam")); /* XXX we can with code */
	    return nh_dup;
	}
    ret = hisAdd(msgid, SPAM);
    if (!ret)
	{
	    logw (("adding <%s> to history failed", msgid));
	    return nh_err;
	}
    logn (("classified <%s> as '%s'", msgid, SPAM));
    return nh_ok;
}

static bool nocem_process_msgids(struct nocem_stats *st, char *s)
{
    settaskinfo ("nocem: processing msgid's");
    for (; s && *s && (s = strchr(s, '<'));)
	{
	    int i;
	    int v;
	    s++;
	    i = strcspn(s, ">\r\n\t ");
	    if (i < 2 || s[i] != '>')
		continue;
	    s[i] = '\0';
	    v = nocem_hide(st, s);
	    switch (v)
		{
		case nh_ok: st->msgid_good++; break;
		case nh_dup: st->msgid_dup++; break;
		case nh_err: st->msgid_fail++; break;
		}
	    s+=i+1;
	}
    return TRUE;
}
	    
static bool pgp_check_art(struct nocem_stats *st, char *art, int len, int artno)
{
    FILE *fp;
    int fd;
    char cmd[MAX_FILE];
    char tmp[MAX_FILE];
    char buf[8192];
    int cc;
    sprintf(tmp, "nocem-pgp-tmp.%d", getpid());
    snprintf(cmd, sizeof cmd, con->nocemPGPcommand, tmp);
    settaskinfo ("nocem: verifying PGP signature");
    fp = popen(cmd, "w");
    if (!fp)
	{
	    loge (("popen(\"%s\") failed", cmd));
	bad:
	    st->pgp_fail++;
	    unlink(tmp);
	    return FALSE;
	}
    if (fwrite (art, len, 1, fp) != 1)
	{
	    loge (("error in writing to PGP process '%s'", cmd));
	    pclose (fp);
	    goto bad;
	}
    if (pclose (fp) !=0)
	{
	badsig:
	    logwn (("unable to verify PGP signature for article %d", artno));
	    goto bad;
	}
    fd = open(tmp, O_RDONLY);
    if (fd == -1)
	{
	    loge (("open(\"%s\") failed", tmp));
	    goto bad;
	}
    cc = read(fd, buf, sizeof buf);
    close (fd);
    if (cc < 1)
	goto bad;
    if (nn_regexec(&nocemPGPgoodPreg, buf, cc, 0, 0, 0) != 0)
	goto badsig;
    logd (("valid PGP signature for article %d", artno));
    st->pgp_good++;
    return TRUE;
}
    
/*
 * this overly hardcoded, but it's faster than a congressional blowjob
 */

static bool nocem_process_art(struct nocem_stats *st, struct strStack *stack, int artno)
{
    char *p;
    bool ret;
    struct nocem_stats st2;
    char *nocem_headers, *nocem_body, *nocem_endbody;
    p = stack->data;
    nocem_headers = strstr(p, "@@BEGIN NCM HEADERS");
    if (!nocem_headers)
	return FALSE;
    nocem_body = strstr(nocem_headers, "@@BEGIN NCM BODY");
    if (!nocem_body)
	return FALSE;
    nocem_endbody = strstr(nocem_body, "@@END NCM BODY");
    if (!nocem_endbody)
	{
	    logwn (("missing '@@END NCM BODY in article %d", artno));
	    return FALSE;
	}
    if (con->nocemPGP)
	{
	    char *start;
	    char *end;
	    start = strstr(stack->data, con->nocemPGPbegin);
	    if (!start)
		{
		    logwn (("missing '%s' in article %d", con->nocemPGPbegin, artno));
		    return FALSE;
		}
	    end = strstr(start, con->nocemPGPend);
	    if (!end)
		{
		    logwn (("missing '%s' in article %d", con->nocemPGPend, artno));
		    return FALSE;
		}
	    /* stop out of window attacks */
	    if (start > nocem_headers ||
		end < nocem_endbody)
		{
		    logwn (("PGP message boundary does not correlate with NCM message boundary for article %d", artno));
		    return FALSE;
		}
	    end += strlen(con->nocemPGPend) + 2;	/* +2 for \r\n -- pgp might need the eol */
	    *end = '\0';
	    if (!pgp_check_art(st, start, end-start, artno))
		return FALSE;
	}
    nocem_body[-1] = nocem_endbody[-1] = '\0';
    if (!nocem_regex_match_header(&nocemTypePreg, nocem_headers, "\nType:"))
	return FALSE;
    if (!nocem_regex_match_header(&nocemActionPreg, nocem_headers, "\nAction:"))
	return FALSE;
    st2 = *st;
    ret = nocem_process_msgids(st, nocem_body);
    logd (("article %d yeilded %s new msgsid's and %s duplicates (already classified msgid's)", artno, bs(st->msgid_good - st2.msgid_good), bs(st->msgid_dup - st2.msgid_dup)));
    return ret;
}

static bool nocem_scan_arts(struct nocem_stats *st, struct strStack *stack)
{
    char *p = stack->data;
    if (strToi (p) != NNTP_HEAD_FOLLOWS_VAL)
	return FALSE;
    p = strchr(p, '\n');
    if (!p)
	return FALSE;
    p++;
    settaskinfo("nocem: scanning article list");
    for (; p < stack->data + stack->used;)
	{	
	    int artno;
	    char *from;
	    char cmd[MAX_CMD];
	    struct strStack *art;
	    struct command cm;
	    artno = strToi (p);
	    SKIPNOWHITE (p);
	    SKIPWHITE (p);
	    from = p;
	    p = strchr(p, '\r');	/* we cab rely on this, as we are talking to ourself, */
					/* and WE read the RFC */
	    if (!p)
		return FALSE;
	    *p++ = '\0';
	    if (*p != '\n')
		return FALSE;
	    *p++ = '\0';		/* nuke \n too */
	    if (artno < 1)
		{
		    st->art_skip++;
		    continue;
		}
	    if (nn_regexec(&nocemFromPreg, from, p - from - 2, 0, 0, 0)!=0)
		{
		    st->art_skip++;
		    continue;
		}
	    slaveClient = strStackAdd(NULL, "");
	    cm.val = c_article;		/* XXX ick */
	    sprintf(cmd, "ARTICLE %d\r\n", artno);
	    settaskinfo("nocem: => [%s] ARTICLE %d", CurrentGroupScfg->host, artno, CurrentGroup);
	    if (!CMDarticle (&cm, cmd, !con->nocemCache))
		{
		    strStackFree (slaveClient);
		    st->art_fail++;
		    continue;
		}
	    st->art_hi = artno;
	    art = slaveClient;
	    slaveClient = NULL;
	    st->bytes_from += art->used;
	    if (nocem_process_art(st, art, artno))
		st->art_good++;
	    else
		st->art_fail++;
	    strStackFree (art);
	}
    return TRUE;
}
	    
/*
 * scans group for nocem articles.
 * returns highest article scanned (or 0 -- failure)
 */

static bool nocem_scan_group(char *group, struct nocem_stats *st)
{
        char cmd[MAX_CMD];
	int hi, lo;
	struct strStack *stack;
	struct nocem_stats st2;
	int start = st->art_hi;
	settaskinfo("nocem: entering group '%s'", group);
	assert (!slaveClient);
	slaveClient = strStackAdd(NULL, "");
	sprintf(cmd, "GROUP %.128s\r\n", group);
	if (!CMDgroup(cmd))
	    {
	    	loge (("group change to '%s' failed", group));
		return FALSE;
	    }
	strStackFree(slaveClient);
	slaveClient = NULL;
	hi = getHi(CurrentGroupNode);
	lo = getLo(CurrentGroupNode);
	if (start > hi)
	    {
		logwn (("highwater article number for '%s' on '%s' decreased, presuming server reset", group, CurrentGroupScfg->host));
		start = 0;
	    }
	if (start < lo)
	    {
		logd (("highwater article number for '%s' adjusted from %d to %d (lowest article in group)", group, start, lo));
		start = lo;
	    }
	start = MAX(start, hi - con->nocemInitialScan);
	st->art_hi = start;
	if (hi <= start) /* XXX if a server article renumbering has occurred then this code skips the first article */
	    {
		logd (("no new articles in '%s' on '%s'", group, CurrentGroupScfg->host));
		return TRUE;
	    }
	slaveClient = strStackAdd(NULL, "");
	settaskinfo("nocem: => [%s] XHDR %d-%d (group '%s')", CurrentGroupScfg->host, start, hi, group);
	sprintf(cmd, "XHDR From %d-%d\r\n", start, hi); 
	if (!CMDxhdr(cmd))
	    {
		loge (("XHDR From %d-%d in '%s' on '%s' failed", start, hi, group, CurrentGroupScfg->host));
		return FALSE;
	    }
	stack = slaveClient;
	slaveClient = NULL;
	st->bytes_from += stack->used;
	st2 = *st;
	nocem_scan_arts(st, stack);
	log (("group '%s' yeilded %s articles, %s new %s msgid's and %s duplicates (already classified msgid's)", group, bs(st->art_good - st2.art_good), bs(st->msgid_good - st2.msgid_good), SPAM, bs(st->msgid_dup - st2.msgid_dup)));
	logd (("group '%s' nocem highwater mark now at %d", group, st->art_hi));
	strStackFree(stack);
	st->last_scan = time (NULL);
	return TRUE;
}

static void nocem_scan()
{
	struct strList *l;
	int n;
	for (l = con->nocemGroups, n=0; l && l->data; l = l->next, n++)
	    {
		nocem_scan_group(l->data, &Stats->nocem_stats[n]);
	    }
}

EXPORT int volatile NocemDaemonPid;

EXPORT bool nocemDaemon()
{
    int pid = 0;
    static time_t last_time;
    time_t tim = time(NULL);
    sigset_t myset;

    if (Task->ti_state != nc_master)
	return FALSE;
    if (!con->nocem || NocemDaemonPid || tim - last_time < con->nocemInterval)
	return FALSE;
    last_time = tim;
    sigemptyset(&myset);
    sigaddset(&myset, SIGCHLD);
    sigprocmask (SIG_BLOCK, &myset, NULL);
    pid = make_vm_proc(nc_nocem, -1, "nocem");
    if (pid == -1)
    {
	sigprocmask (SIG_UNBLOCK, &myset, NULL);
	return FALSE;
    }
    if (pid > 1)
	{
	    NocemDaemonPid = pid;
	    sigprocmask (SIG_UNBLOCK, &myset, NULL);
	    return TRUE;
	}
    sigprocmask (SIG_UNBLOCK, &myset, NULL);
    while (HoldForksNocem) {}
    /* set internal credentials */
    strcpy (ClientHost, "<nocem@nntpcache>");
    strcpy (ClientHostAddr, "127.0.0.1");

    ModeReader = TRUE;

    nocem_scan();
    retire_vm_proc (0);
    NOTREACHED;
}
