/*
 * Fast index for tag data, based on integer indexes
 *
 * Copyright (C) 2006  Enrico Zini <enrico@debian.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <tagcoll/StringIndex.h>

using namespace std;
using namespace Tagcoll;

int StringIndex::data(const char* str) const
{
	int begin, end;

	/* Binary search */
	begin = -1, end = size();
	while (end - begin > 1)
	{
		int cur = (end + begin) / 2;
		if (strcmp(data(cur), str) > 0)
			end = cur;
		else
			begin = cur;
	}

	if (begin == -1 || strcmp(data(begin), str) != 0)
		//throw NotFoundException(string("looking for the ID of string ") + str);
		return -1;
	else
		return begin;
}


int StringIndexer::operator()(const std::string& item) const
{
	int begin, end;

	/* Binary search */
	begin = -1, end = data.size();
	while (end - begin > 1)
	{
		int cur = (end + begin) / 2;
		if (data[cur] > item)
			end = cur;
		else
			begin = cur;
	}

	if (begin == -1 || data[begin] != item)
		//throw NotFoundException(string("looking for the ID of string ") + str);
		return -1;
	else
		return begin;
}

void StringIndexer::map(const std::string& str)
{
	if ((*this)(str) != -1)
		return;

	// Insertion sort
	int pos = data.size();
	data.push_back(string());
	for (; pos > 0; pos--)
		if (data[pos - 1] > str)
			data[pos] = data[pos - 1];
		else
			break;
	data[pos] = str;
}

int StringIndexer::encodedSize() const
{
	// First the size of the offset array
	int bufsize = data.size() * sizeof(int);
	
	// Then the size of all the 0-terminated strings
	for (vector<string>::const_iterator i = data.begin(); i != data.end(); i++)
		bufsize += i->size() + 1;

	// Then the int with the number of items
	return bufsize;
}

void StringIndexer::encode(char* buf) const
{
	int pos = data.size() * sizeof(int);
	int idx = 0;
	for (vector<string>::const_iterator i = data.begin(); i != data.end(); i++)
	{
		((int*)buf)[idx++] = pos;
		memcpy(buf + pos, i->c_str(), i->size() + 1);
		pos += i->size() + 1;
	}
}


#ifdef COMPILE_TESTSUITE

#include <tests/test-utils.h>

namespace tut {
using namespace tut_tagcoll;

static const char* fname = "tagcoll_stringindex.tmp";

struct tagcoll_stringindex_shar {
	tagcoll_stringindex_shar() {
		// Create the index
		MasterMMapIndexer master(fname);

		StringIndexer indexer;
		indexer.map("pizza");
		indexer.map("spaghetti");
		indexer.map("tortellini");
		indexer.map("lasagne");
		indexer.map("polpettone");
		indexer.map("friggione");
		indexer.map("arrosto");
		indexer.map("pizza");
		indexer.map("lasagne");
		master.append(indexer);
		master.commit();
	}
	~tagcoll_stringindex_shar() {
		// Delete the test index
		unlink(fname);
	}
};
TESTGRP(tagcoll_stringindex);

template<> template<>
void to::test<1>()
{
	MasterMMapIndex master(fname);

	// Read the index
	StringIndex index(master, 0);

	// Check the number of mapped items
	ensure_equals(index.size(), 7u);

	// Check that the arrays have the right size
	ensure_equals(string(index.data(0)), string("arrosto"));
	ensure_equals(string(index.data(1)), string("friggione"));
	ensure_equals(string(index.data(2)), string("lasagne"));
	ensure_equals(string(index.data(3)), string("pizza"));
	ensure_equals(string(index.data(4)), string("polpettone"));
	ensure_equals(string(index.data(5)), string("spaghetti"));
	ensure_equals(string(index.data(6)), string("tortellini"));

	// Check that the arrays are sorted and contain the right data
	ensure_equals(index.data("arrosto"),    0);
	ensure_equals(index.data("friggione"),  1);
	ensure_equals(index.data("lasagne"),    2);
	ensure_equals(index.data("pizza"),      3);
	ensure_equals(index.data("polpettone"), 4);
	ensure_equals(index.data("spaghetti"),  5);
	ensure_equals(index.data("tortellini"), 6);
}

}

#endif
// vim:set ts=4 sw=4:
