#include <string.h>
#include <ctype.h>
#include <fnmatch.h>

/* this line is needed for FreeBSD's regex.h. sigh */
#include <sys/types.h>

#include <regex.h>
#include "Python.h"
#include "mmapmodule.h"

#include "metaphone.h"
#include "levenshtein.h"
#include "soundex.h"

/* #include <unicode/ustring.h> */

/* maxmum possible size for entry */
#define MAXWORDLEN 255

static unsigned short int word_len;

char *pre_exact_strat(w)
char *w;
{
    static char word[MAXWORDLEN + 1];
    strncpy(word, w, MAXWORDLEN);
    strcat(word, "\t");
    word_len = strlen(word);
    return word;
};

char *pre_prefix_strat(w)
char *w;
{
    word_len = strlen(w);
    return w;
};

unsigned short int exact_strat(s, w)
char *w, *s;
{
    return (unsigned short int) !strcmp(s, w);
};

unsigned short int prefix_strat(s, w)
char *w, *s;
{
    return (unsigned short int) !strncmp(s, w, word_len);
};

unsigned short int substring_strat(s, w)
char *w, *s;
{
    return (unsigned short int) (strstr(s, w) != NULL);
};

unsigned short int suffix_strat(s, w)
char *w, *s;
{
    char *p, *pw;
    pw = w + strlen(w);
    for (p = s + strlen(s) - 1; (p >= s) && (pw > w); p--) {
	pw--;
	if (*pw != *p)
	    return 0;
    }
    return (unsigned short int) (pw <= w);
};


#define SOUNDEX_LEN 8

char *pre_soundex_strat(w)
char *w;
{
    static char hash[SOUNDEX_LEN + 1];
    soundex_hash(w, hash, SOUNDEX_LEN);
    return hash;
};

unsigned short int soundex_strat(s, w)
char *w, *s;
{
    char hash[SOUNDEX_LEN + 1];
    soundex_hash(s, hash, SOUNDEX_LEN);
    return (unsigned short int) !strncmp(hash, w, SOUNDEX_LEN);
};

unsigned short int lev_strat(s, w)
char *w, *s;
{
    return (unsigned short int) (fastest_levdist(s, w) <= 1);
};

char *pre_regexp_strat(w)
char *w;
{
    static regex_t rx;
    regcomp(&rx, w, REG_NOSUB);
    return (char *) &rx;
};

unsigned short int regexp_strat(s, rx)
char *rx, *s;
{
    return (unsigned short int) !regexec((regex_t *) rx, s, 0, NULL, 0);
};

/* if fnmatch does not know about FNM_CASEFOLD, we loose
   case insensitivity. So what - it does not work anyway
   for unicode strings */
#ifndef FNM_CASEFOLD
# define FNM_CASEFOLD 0
#endif

unsigned short int fnmatch_strat(s, w)
char *w, *s;
{
    return (unsigned short int) !fnmatch(w, s, FNM_CASEFOLD);
};

char *pre_metaphone_strat(w)
char *w;
{
    static char metaph[MAXMETAPH + 1];
    metaphone(w, metaph, 1);
    return metaph;
};

unsigned short int metaphone_strat(s, m)
char *s, *m;
{
    return (unsigned short int) !metaphone(s, m, 0);
};


static unsigned short int (*strategies[]) () = {
    &exact_strat, &prefix_strat, &substring_strat, &suffix_strat,
	&soundex_strat, &lev_strat, &regexp_strat, &fnmatch_strat,
	&metaphone_strat};

static char *(*pre_strategies[]) () = {
    &pre_exact_strat, &pre_prefix_strat, NULL, NULL, &pre_soundex_strat,
	NULL, &pre_regexp_strat, NULL, &pre_metaphone_strat};



void getentry(string, i, mmapobject, positions, tab)
char *string;
unsigned int i;
PyObject *mmapobject;
PyObject *positions;
unsigned short int tab;		/* 1 if we want trailing '\t' as well */
{
    unsigned long pos;
    void *str;
    char *spos;
    unsigned short int tt;

    pos = PyInt_AsLong(PyList_GetItem(positions, i));
    str = (void *) (((mmap_object *) mmapobject)->data + pos);
    /* this is dangerous if index file is corrupted and last
       entry is missing \t - this needs to be checked when
       creating positions */
    spos = memccpy((void *) string, str, '\t', MAXWORDLEN);
    if (tab)
	tt = 0;
    else
	tt = 1;
    /* string is now entry word in index file */
    if (spos) {
	*(spos - tt) = '\0';
    } else {
	printf("Warning: Corrupted index file \n");
    };
}

int findentry(word, word_len, entries, mmapobject, positions)
const char *word;
size_t word_len;
unsigned int entries;
PyObject *mmapobject;
PyObject *positions;
{
    const char string[MAXWORDLEN];
    unsigned int low, high, middle;
    short int c;

    low = 0;
    high = entries - 1;
    while (1) {
	middle = low + (high - low) / 2;
	if (middle == low)
	    return -middle - 1;
	getentry(string, middle, mmapobject, positions, 1);
	c = strncmp(string, word, word_len);
	if (c == 0)
	    return middle;
	if (c < 0)
	    low = middle;
	else
	    high = middle;
    }
}


static PyObject *loop_in_C(self, args)
PyObject *self;
PyObject *args;
{
    PyObject *mmapobject;
    PyObject *positions;
    char *word;

    PyObject *rlist;
    PyObject *val;


    unsigned short int strategy;
    unsigned short int go_through_all;
    unsigned short int tab;
    unsigned int maxnr;

    int indexlen;
    int i, low_index, high_index;
    int guess;
    char *newword;
    unsigned int count;
    const char string[256];

    if (!PyArg_ParseTuple
	(args, "OOshi", &mmapobject, &positions, &word, &strategy, &maxnr))
	return NULL;
    indexlen = PyList_Size(positions);

    rlist = PyList_New(0);
    if (!rlist) {		/* error */
	PyErr_SetString(PyExc_MemoryError, "error allocating list");
	return NULL;
    };
    count = 0;
    if (pre_strategies[strategy] != NULL)
	newword = pre_strategies[strategy] (word);
    else
	newword = word;

    if (strategy == 0)
	tab = 1;
    else
	tab = 0;
    if (strategy == 0)
	guess =
	    abs(findentry
		(newword, strlen(newword), indexlen, mmapobject, positions,
		 tab));
    else
	guess =
	    abs(findentry
		(word, strlen(word), indexlen, mmapobject, positions,
		 tab));

    low_index = guess;
    high_index = guess + 1;
    if (strategy == 0 || strategy == 1)
	go_through_all = 0;
    else
	go_through_all = 1;
    /* if maxnr is zero, return everything */
    while (((!maxnr) || count < maxnr) && (low_index >= 0 || high_index < indexlen)) {
	if (low_index >= 0) {
	    i = low_index;
	    low_index--;
	    getentry(&string, i, mmapobject, positions, tab);
	    if (strategies[strategy] (string, newword)) {
		val = Py_BuildValue("i", i);
		PyList_Append(rlist, val);
        	    count++;
	    } else if (!go_through_all) {
		low_index = -1;
	    }
	}

	if (high_index < indexlen) {
	    i = high_index;
	    high_index++;
	    getentry(&string, i, mmapobject, positions, tab);
	    if (strategies[strategy] (string, newword)) {
		val = Py_BuildValue("i", i);
		PyList_Append(rlist, val);
		count++;
	    } else if (!go_through_all) {
		high_index = indexlen;
	    }
	}
    }


    return rlist;
}


static PyMethodDef SearchMethods[] = {
    {"loop_in_C", loop_in_C, METH_VARARGS},
    {NULL, NULL}		/* Sentinel */
};


void initsearching()
{
    (void) Py_InitModule("searching", SearchMethods);
}
