/* Ruby extension module to use SUFARY */
  
/* Copyright (C) 1999  Takao KAWAMURA <kawamura@debian.or.jp> */
  
/*  Author: Takao Kawamura <kawamura@debian.or.jp> */
/*  Created: 9 Feb 1999 */
/*  Version: $Id: sufary.c,v 1.2 1999/09/06 08:47:07 kawamura Exp $ */

/* This program is free software; you can redistribute it and/or modify */
/* it under the terms of the GNU General Public License as published by */
/* the Free Software Foundation; either version 2 of the License, or */
/* (at your option) any later version. */
  
/* This program is distributed in the hope that it will be useful, */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the */
/* GNU General Public License for more details. */
  
/* You should have received a copy of the GNU General Public License */
/* along with this program; if not, write to the Free Software */
/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#include "ruby.h"
#include "sufary.h"

typedef struct {
    int num;
    SUFARY *ary;
} rb_sufary;

typedef struct {
    DID *did;
} rb_did;

static VALUE cSUFARY, cDID;

static char *buf = NULL;

#define GetSUFARY(obj, sa) {\
    Data_Get_Struct(obj, rb_sufary, sa);\
    if (sa->ary == NULL) closed_ary();\
}

#define GetDID(obj, did) {\
    Data_Get_Struct(obj, rb_did, did);\
    if (did->did == NULL) closed_did();\
}

static void closed_ary(void)
{
    rb_fatal("closed array file");
}

static void closed_did(void)
{
    rb_fatal("closed did file");
}

static void free_sufary(rb_sufary *sa)
{
    if (sa->ary)
        sa_closefiles(sa->ary);
    free(sa);
}

static void free_did(rb_did *did)
{
    if (did->did)
        sa_closedid(did->did);
    free(did);
}

static VALUE fsufary_init(VALUE obj)
{
    rb_sufary *sa;

    GetSUFARY(obj, sa);
    sa_reset(sa->ary);
    return Qnil;
}

static VALUE fsufary_add_search(VALUE obj, VALUE str)
{
    rb_sufary *sa;

    Check_SafeStr(str);
    GetSUFARY(obj, sa);
    if (sa_sel(sa->ary, RSTRING(str)->ptr) == FAIL) {
        sa->num = 0;
        return Qnil;
    }
    sa->num = sa->ary->right - sa->ary->left + 1;
    return INT2FIX(sa->num);
}

static VALUE fsufary_search(VALUE obj, VALUE str)
{
    fsufary_init(obj);
    return fsufary_add_search(obj, str);
}


static VALUE fsufary_s_open(int ac, VALUE *av, VALUE class)
{
    rb_sufary *sa;
    VALUE obj, text, ary;
    char *array_name;

    if (rb_scan_args(ac, av, "11", &text, &ary) == 1)
        array_name = NULL;
    else {
        array_name = RSTRING(ary)->ptr;
        Check_SafeStr(ary);
    }
    Check_SafeStr(text);
    obj = Data_Make_Struct(class, rb_sufary, 0, free_sufary, sa);
    if ((sa->ary = sa_openfiles(RSTRING(text)->ptr, array_name)) == NULL)
    	rb_sys_fail(NULL);
    sa->num = 0;
    return obj;
}

static VALUE fsufary_reopen(int ac, VALUE *av, VALUE obj)
{
    rb_sufary *sa;
    VALUE text, ary;
    char *array_name;

    if (rb_scan_args(ac, av, "11", &text, &ary) == 1)
        array_name = NULL;
    else {
        array_name = RSTRING(ary)->ptr;
        Check_SafeStr(ary);
    }
    Check_SafeStr(text);
    GetSUFARY(obj, sa);
    sa_closefiles(sa->ary);
    if ((sa->ary = sa_openfiles(RSTRING(text)->ptr, array_name)) == NULL)
    	rb_sys_fail(NULL);
    sa->num = 0;
    return obj;
}

static VALUE fsufary_close(VALUE obj)
{
    rb_sufary *sa;

    GetSUFARY(obj, sa);
    sa_closefiles(sa->ary);
    sa->ary = NULL;
}

static VALUE fsufary_id2line(VALUE obj, VALUE pos)
{
    rb_sufary *sa;
    int n;

    GetSUFARY(obj, sa);
    if ((n = NUM2INT(pos)) >= sa->ary->txtsz)
        return Qnil;
    free(buf);
    buf = sa_getline(sa->ary, n);
    return rb_tainted_str_new2(buf);
}

static VALUE fsufary_line(VALUE obj, VALUE index)
{
    rb_sufary *sa;
    int n;

    n = NUM2INT(index);
    GetSUFARY(obj, sa);
    if (n < 0 || sa->num <= n)
        return Qnil;
    free(buf);
    buf = sa_getline(sa->ary, sa_aryidx2txtidx(sa->ary, sa->ary->left+n));
    return rb_tainted_str_new2(buf);
}

static VALUE fsufary_pid2lid(VALUE obj, VALUE pos)
{
    rb_sufary *sa;
    int n;

    n = NUM2INT(pos);
    GetSUFARY(obj, sa);
    if (*(sa->ary->txtmap + n) == '\n')
        n--;
    while (0 <= n && *(sa->ary->txtmap + n) != '\n')
        n--;
    if (n < 0)
        n = 0;
    else
        n++;
    return INT2FIX(n);
}

static VALUE fsufary_lid(VALUE obj, VALUE index)
{
    rb_sufary *sa;
    int n;

    n = NUM2INT(index);
    GetSUFARY(obj, sa);
    if (n < 0 || sa->num <= n)
        return Qnil;
    return fsufary_pid2lid(
        obj,
        INT2FIX(sa_aryidx2txtidx(sa->ary, sa->ary->left+n)));
}

static VALUE fsufary_getstr(VALUE obj, VALUE pos, VALUE len)
{
    rb_sufary *sa;

    GetSUFARY(obj, sa);
    free(buf);
    buf = sa_getstr(sa->ary, NUM2INT(pos), NUM2INT(len));
    return rb_tainted_str_new2(buf);
}

static VALUE fsufary_get_all_pos(VALUE obj)
{
    rb_sufary *sa;
    VALUE rarray;
    int i, n;

    GetSUFARY(obj, sa);
    if ((n = sa->num) < 0)
        return Qnil;
    
    if (rb_iterator_p()) {
        for (i = 0; i < n; i++)
            rb_yield(INT2FIX(sa_aryidx2txtidx(sa->ary, sa->ary->left+i)));
        return obj;
    }

    for (rarray = rb_ary_new2(n); n > 0; n--)
        rb_ary_unshift(rarray,
                    INT2FIX(sa_aryidx2txtidx(sa->ary, sa->ary->left+n-1)));
    return rarray;
}

static VALUE fsufary_get_all_line(VALUE obj)
{
    rb_sufary *sa;
    VALUE rarray;
    int i, n;

    GetSUFARY(obj, sa);
    if ((n = sa->num) < 0)
        return Qnil;
    
    if (rb_iterator_p()) {
        for (i = 0; i < n; i++) {
            buf = sa_getline(sa->ary, sa_aryidx2txtidx(sa->ary, sa->ary->left+i));
            rb_yield(rb_tainted_str_new2(buf));
        }
        return obj;
    }

    for (rarray = rb_ary_new2(n); n > 0; n--) {
        free(buf);
        buf = sa_getline(sa->ary, sa_aryidx2txtidx(sa->ary, sa->ary->left+n-1));
        rb_ary_unshift(rarray, rb_tainted_str_new2(buf));
    }
    return rarray;
}

static VALUE fsufary_get_all_lid(VALUE obj)
{
    rb_sufary *sa;
    VALUE rarray;
    int i, n;

    GetSUFARY(obj, sa);
    if ((n = sa->num) < 0)
        return Qnil;
    
    if (rb_iterator_p()) {
        for (i = 0; i < n; i++)
            rb_yield(fsufary_pid2lid(
                obj,
                INT2FIX(sa_aryidx2txtidx(sa->ary, sa->ary->left+i))));
        return obj;
    }

    for (rarray = rb_ary_new2(n); n > 0; n--)
        rb_ary_unshift(rarray, fsufary_lid(obj, INT2FIX(n-1)));
    return rarray;
}

static VALUE fsufary_get_block(int ac, VALUE *av, VALUE obj)
{
    rb_sufary *sa;
    VALUE pos, start, end;

    if (rb_scan_args(ac, av, "21", &pos, &start, &end) == 2)
        end = start;
    Check_SafeStr(start);
    Check_SafeStr(end);
    GetSUFARY(obj, sa);
    free(buf);
    buf = sa_getblock(sa->ary, NUM2INT(pos),
                      RSTRING(start)->ptr, RSTRING(end)->ptr);
    return rb_tainted_str_new2(buf);
}

static VALUE fsufary_common_prefix(VALUE obj, VALUE key, VALUE sep)
{
    rb_sufary *sa;
    VALUE rarray;
    static long *ret = NULL;
    int i, n;

    Check_SafeStr(key);
    Check_SafeStr(sep);

    GetSUFARY(obj, sa);
    free(ret);
    ret = sa_common_prefix_search(sa->ary, RSTRING(key)->ptr, *(RSTRING(sep)->ptr));
    if ((n = *ret) == 0)
        return Qnil;
    if (rb_iterator_p()) {
        for (i = 0; i < n; i++)
            rb_yield(INT2FIX(ret[1+i]));
        return obj;
    }
    for (rarray = rb_ary_new2(n); n > 0; n--)
        rb_ary_unshift(rarray, INT2FIX(ret[n]));
    return rarray;
}

static int sufary_sort(VALUE *a, VALUE *b)
{
    return *a - *b;
}

static VALUE fsufary_lookup(VALUE obj, VALUE str)
{
    rb_sufary *sa;
    VALUE rarray;
    int i, n;

    fsufary_search(obj, str);
    GetSUFARY(obj, sa);
    if ((n = sa->num) < 0)
        return Qnil;
    for (rarray = rb_ary_new2(n); n > 0; n--)
        rb_ary_unshift(rarray,
                    INT2FIX(sa_aryidx2txtidx(sa->ary, sa->ary->left+n-1)));
    ruby_qsort(RARRAY(rarray)->ptr, RARRAY(rarray)->len,
             sizeof(VALUE), sufary_sort);
    for (i = 0; i < RARRAY(rarray)->len; i++) {
        free(buf);
        buf = sa_getline(sa->ary, NUM2INT(RARRAY(rarray)->ptr[i]));
        RARRAY(rarray)->ptr[i] = rb_tainted_str_new2(buf);
    }

    if (rb_iterator_p()) {
        for (i = 0; i < RARRAY(rarray)->len; i++)
            rb_yield(RARRAY(rarray)->ptr[i]);
        return obj;
    }
    return rarray;
}

static VALUE fsufary_length(VALUE obj)
{
    rb_sufary *sa;

    GetSUFARY(obj, sa);
    return INT2FIX(sa->num);
}

static VALUE fdid_s_open(VALUE class, VALUE file)
{
    rb_did *did;
    VALUE obj;

    Check_SafeStr(file);
    obj = Data_Make_Struct(class, rb_did, 0, free_did, did);
    if ((did->did = sa_opendid(RSTRING(file)->ptr)) == NULL)
    	rb_sys_fail(RSTRING(file)->ptr);
    return obj;
}

static VALUE fdid_reopen(VALUE obj, VALUE file)
{
    rb_did *did;

    Check_SafeStr(file);
    GetDID(obj, did);
    sa_closedid(did->did);
    if ((did->did = sa_opendid(RSTRING(file)->ptr)) == NULL)
    	rb_sys_fail(RSTRING(file)->ptr);
    return obj;
}

static VALUE fdid_close(VALUE obj)
{
    rb_did *did;

    GetDID(obj, did);
    sa_closedid(did->did);
    did->did = NULL;
}

static VALUE fdid_length(VALUE obj)
{
    rb_did *did;

    GetDID(obj, did);
    return INT2FIX(sa_did_size(did->did));
}

static VALUE fdid_search(VALUE obj, VALUE pos)
{
    rb_did *did;
    VALUE rarray;
    long no;

    GetDID(obj, did);
    sa_didsearch(did->did, NUM2INT(pos));

    if ((no = sa_doc_no(did->did)) < 0)
        return Qnil;
    rarray = rb_ary_new2(3);
    rb_ary_push(rarray, INT2FIX(no));
    rb_ary_push(rarray, INT2FIX(sa_doc_start(did->did)));
    rb_ary_push(rarray, INT2FIX(sa_doc_size(did->did)));
    return rarray;
}

int Init_sufary(void)
{
    cSUFARY = rb_define_class("Sufary", rb_cObject);

    rb_define_singleton_method(cSUFARY, "open", fsufary_s_open, -1);
    rb_define_singleton_method(cSUFARY, "new", fsufary_s_open, -1);

    rb_define_method(cSUFARY, "reopen", fsufary_reopen, -1);
    rb_define_method(cSUFARY, "close", fsufary_close, 0);
    rb_define_method(cSUFARY, "init", fsufary_init, 0);
    rb_define_method(cSUFARY, "search0", fsufary_add_search, 1);
    rb_define_method(cSUFARY, "search", fsufary_search, 1);
    rb_define_method(cSUFARY, "line", fsufary_line, 1);
    rb_define_method(cSUFARY, "lid", fsufary_lid, 1);
    rb_define_method(cSUFARY, "get_all_pos", fsufary_get_all_pos, 0);
    rb_define_method(cSUFARY, "get_all_line", fsufary_get_all_line, 0);
    rb_define_method(cSUFARY, "get_all_lid", fsufary_get_all_lid, 0);
    rb_define_method(cSUFARY, "getstr", fsufary_getstr, 2);
    rb_define_method(cSUFARY, "id2line", fsufary_id2line, 1);
    rb_define_method(cSUFARY, "get_block", fsufary_get_block, -1);
    rb_define_method(cSUFARY, "pid2lid", fsufary_pid2lid, 1);
    rb_define_method(cSUFARY, "common_prefix", fsufary_common_prefix, 2);

    rb_define_method(cSUFARY, "lookup", fsufary_lookup, 1);
    rb_define_method(cSUFARY, "[]", fsufary_line, 1);
    rb_define_method(cSUFARY, "length", fsufary_length, 0);
    rb_define_alias(cSUFARY,  "size", "length");
    
    cDID = rb_define_class("Did", rb_cObject);

    rb_define_singleton_method(cDID, "open", fdid_s_open, 1);
    rb_define_singleton_method(cDID, "new", fdid_s_open, 1);

    rb_define_method(cDID, "reopen", fdid_reopen, 1);
    rb_define_method(cDID, "close", fdid_close, 0);
    rb_define_method(cDID, "length", fdid_length, 0);
    rb_define_alias(cDID,  "size", "length");
    rb_define_alias(cDID,  "did_size", "length");
    rb_define_method(cDID, "search", fdid_search, 1);
    rb_define_alias(cDID,  "didsearch", "search");

    return 0;
}
