/* vim: set noet ts=4:
 *
 * Copyright (c) 2002-2006 Martin A. Godisch <martin@godisch.de>.
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the License, or (at your option) any later
 * version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
 * St, Fifth Floor, Boston, MA 02110-1301, USA.
 */
#include <data.h>
#include <freedict.h>
#include <latrine.h>
#include <memory.h>
#include <stdio.h>
#include <time.h>

char
	*dictfile = NULL,
	*wordfile = NULL;
size_t
	randcount = DEFAULT_RANDCOUNT,
	wordlimit = DEFAULT_WORDLIMIT;
static size_t
	dictcount = 0,
	wordcount = 0;
static struct word
	*wordlist = NULL;
static double
	smallest_out = 0.0,
	largest_in   = 0.0;

size_t get_wordcount(void) { return wordcount; }
size_t get_dictcount(void) { return dictcount; }

static int comp_by_access(const void *a, const void *b)
{
	time_t q1 = ((struct word*)a)->timestamp;
	time_t q2 = ((struct word*)b)->timestamp;
	if (q1 < q2)
		return -1;
	else if (q1 > q2)
		return +1;
	return 0;
}

static int comp_by_rate(const void *a, const void *b)
{
	double q1 = ((struct word*)a)->rate;
	double q2 = ((struct word*)b)->rate;
	if (q1 < q2)
		return -1;
	else if (q1 > q2)
		return +1;
	return comp_by_access(a, b);
}

static int comp_by_pos(const void *a, const void *b)
{
	double q1 = ((struct word*)a)->pos;
	double q2 = ((struct word*)b)->pos;
	if (q1 < q2)
		return -1;
	else if (q1 > q2)
		return +1;
	return 0;
}

static inline double rate(struct word *w)
{
	double r = 0.0;
	unsigned long i;

	assert(w != NULL);
	for (i = 1; i <= 0x800000; i *= 2)
		if (w->index & i)
			r += 1.0;
	for (i = 1; i <= 0x800000; i *= 2)
		if (w->index & i)
			r += 1.0;
		else
			break;
	return(w->rate = r);
}

void update_word(struct word *w, int hit)
{
	assert(w != NULL);
	w->index = ((w->index << 1) & 0xffffff) | (hit ? 1 : 0);
	time(&w->timestamp);
	rate(w);
}

struct word *select_word(void)
{
	static size_t
		last[3] = {(size_t)(-1), (size_t)(-1), (size_t)(-1)};
	static int
		cycle   = 0;
	size_t
		next    = 0;

	if (randcount > wordcount)
		randcount = wordcount;
	assert(randcount > 0);
	if (cycle < 3) {
		qsort(wordlist, wordcount, sizeof(struct word), comp_by_rate);
		largest_in = wordlist[wordcount-1].rate;
		cycle++;
	} else {
		qsort(wordlist, wordcount, sizeof(struct word), comp_by_access);
		cycle = 0;
	}
	do
		next = random() % randcount;
	while ((wordlist[next].pos == last[0] && randcount > 1)
		|| (wordlist[next].pos == last[1] && randcount > 2)
		|| (wordlist[next].pos == last[2] && randcount > 3));
	last[2] = last[1];
	last[1] = last[0];
	last[0] = wordlist[next].pos;
	return &wordlist[next];
}

static inline void free_wordlist(void)
{
	size_t i;
	for (i = 0; i < wordcount; i++) {
		FREE(&wordlist[i].lang[0]);
		FREE(&wordlist[i].lang[1]);
	}
	if (wordlist != NULL)
		free(wordlist);
	wordlist  = NULL;
	wordcount = 0;
}

static inline int check_wordlist_version(gzFile hits)
{
	char buffer[BUFSIZE];
	int  version;

	assert(hits != NULL);
	while (gzgets(hits, buffer, BUFSIZE) != Z_NULL) {
		if (sscanf(buffer, HEADER, &version) == 1) {
			if (version >= 2 && version <= WORDLIST_VERSION)
				return version;
			else
				return 0;
		}
	}
	/* no version marker found, assuming version = 1 */
	return 0;
}

/* load the dictionary and the corresponding hits file,
 * an existing wordlist will be overridden
 *
 * returns  0: success
 * returns -1: failure (errmsg called, wordlist freed)
 */
int load_wordlist(void)
{
	int (*open_dict)(const char*)          = open_freedict;
	int (*read_dict)(gzFile, struct word*) = read_freedict;
	int (*close_dict)(void)                = close_freedict;
	gzFile hits = NULL;
	double q    = 0.0;
	struct word w;
	size_t cursize, i;
	time_t randinit;
	int    ret;

	assert(dictfile != NULL);
	assert(wordfile != NULL);
	if (wordlimit == 0) {
		if (wordlist == NULL)
			wordlist = (struct word*)MALLOC((cursize = WORDSTEP) * sizeof(struct word));
		else
			cursize  = wordcount;
	} else {
		if (wordlist == NULL)
			wordlist = (struct word*)MALLOC(wordlimit * sizeof(struct word));
		else {
			wordlist = (struct word*)REALLOC(wordlist, wordlimit * sizeof(struct word));
			memset(&wordlist[wordcount], 0, &wordlist[wordlimit] - &wordlist[wordcount]);
		}
		cursize = wordlimit;
	}
	if (open_dict(dictfile) == -1)
		return -1;
	if ((hits = gzopen(wordfile, "rb")) == NULL && errno != ENOENT)
		errmsg(_("cannot open wordlist: %s"), errno == 0 ? zError(Z_MEM_ERROR) : strerror(errno));
	if (hits != NULL) {
		if (check_wordlist_version(hits))
			gzrewind(hits);
		else {
			errmsg(_("ignoring wordlist because of incompatible version"));
			gzclose(hits);
			hits = NULL;
		}
	}
	for (dictcount = 0, wordcount = 0; (ret = read_dict(hits, &w)) == 1; dictcount++) {
		if (wordcount >= cursize && wordlimit == 0) {
			assert(wordcount == cursize);
			wordlist = (struct word*)REALLOC(wordlist, (cursize += WORDSTEP) * sizeof(struct word));
			memset(&wordlist[wordcount], 0, &wordlist[cursize] - &wordlist[wordcount]);
		}
		q = rate(&w);
		if (wordcount < cursize) {
			if (wordcount == 0 || q > largest_in)
				largest_in = q;
			FREE(&wordlist[wordcount].lang[0]);
			FREE(&wordlist[wordcount].lang[1]);
			wordlist[wordcount++] = w;
			continue;
		}
		if (q < largest_in) {
			for (i = 0; i < cursize; i++)
				if (rate(&wordlist[i]) == largest_in)
					break;
			assert(i < cursize);
			FREE(&wordlist[i].lang[0]);
			FREE(&wordlist[i].lang[1]);
			wordlist[i]  = w;
			smallest_out = largest_in;
			largest_in   = q;
			continue;
		}
		if (wordcount == cursize || q < smallest_out)
			smallest_out = q;
		FREE(&w.lang[0]);
		FREE(&w.lang[1]);
	}
	FREE(&w.lang[0]);
	FREE(&w.lang[1]);
	close_dict();
	if (hits != NULL)
		gzclose(hits);
	if (ret == -1 || wordcount == 0)
		free_wordlist();
	else {
		if (wordcount < cursize)
			wordlist = (struct word*)REALLOC(wordlist, wordcount * sizeof(struct word));
		time(&randinit);
		srandom(randinit);
		qsort(wordlist, wordcount, sizeof(struct word), comp_by_rate);
	}
	if (ret == 0 && wordcount == 0) {
		errmsg(_("invalid or empty dictionary"));
		ret = -1;
	}
	return ret;
}

/* save the dictionary and the corresponding hits file
 *
 * returns  0: success
 * returns -1: failure (errmsg called)
 */
int save_wordlist(void)
{
	char buffer[BUFSIZE];
	char *tempfile   = NULL;
	gzFile old       = NULL;
	gzFile new       = NULL;
	const char *zmsg = NULL;
	unsigned long
		index     = 0,
		timestamp = 0;
	size_t i, n;
	int ret;

	if (wordcount == 0)
		return 0;
	tempfile = (char*)MALLOC(strlen(wordfile) + 5);
	sprintf(tempfile, "%s.new", wordfile);
	if ((old = gzopen(wordfile, "rb")) == NULL && errno != ENOENT)
		errmsg(_("cannot open old wordfile: %s"), errno == 0 ? zError(Z_MEM_ERROR) : strerror(errno));
	if (old != NULL) {
		if (check_wordlist_version(old))
			gzrewind(old);
		else {
			gzclose(old);
			old = NULL;
		}
	}
	if ((new = gzopen(tempfile, "wb")) == NULL) {
		errmsg(_("cannot open new wordfile: %s"), errno == 0 ? zError(Z_MEM_ERROR) : strerror(errno));
		FREE(&tempfile);
		return -1;
	}
	gzprintf(new, HEADER, WORDLIST_VERSION);
	gzprintf(new, _("# Do not change the first line or this file cannot be read anymore!\n"));
	gzprintf(new, _("# Do not rename this file or it cannot be found anymore!\n"));
	gzprintf(new, _("# Do not edit while LaTrine is running, your changes will be overridden!\n"));
	gzprintf(new, _("# Be careful not to destroy the position-dependent mapping with the dictionary!\n"));
	gzprintf(new, _("# Dictionary: %s\n\n"), dictfile);
	qsort(wordlist, wordcount, sizeof(struct word), comp_by_pos);
	for (i = 0, n = 0; gzgets(old, buffer, BUFSIZE) != Z_NULL;) {
		if (*buffer == '\n' || *buffer == '#' || sscanf(buffer, "%06lx:%08lx", &index, &timestamp) < 2)
			continue;
		if (i < wordcount && wordlist[i].pos == n) {
			gzprintf(new, "%06lx:%08lx\n", wordlist[i].index, wordlist[i].timestamp);
			i++;
		} else
			gzprintf(new, "%06lx:%08lx\n", index, timestamp);
		n++;
	}
	if (old != NULL)
		gzclose(old);
	for (; n < dictcount; n++)
		if (i < wordcount && wordlist[i].pos == n) {
			gzprintf(new, "%06lx:%08lx\n", wordlist[i].index, wordlist[i].timestamp);
			i++;
		} else
			gzprintf(new, "%06lx:%08lx\n", 0, 0);
	if ((ret = gzclose(new)) != Z_OK) {
		zmsg = gzerror(new, &ret);
		errmsg(_("cannot close wordfile: %s"), ret == Z_ERRNO ? strerror(errno) : zmsg);
		FREE(&tempfile);
		return -1;
	}
	if (rename(tempfile, wordfile) != 0) {
		errmsg(_("cannot update wordfile: %s"), strerror(errno));
		FREE(&tempfile);
		return -1;
	}
	FREE(&tempfile);
	return 0;
}
