/**************************************************************************
 *                                                                        *
 *   Copyright (C) 2001 Grub, Inc.                                        *
 *                                                                        *
 *   This program is free software; you can redistribute it and/or modify *
 *   it under the terms of the GNU General Public License as published by *
 *   the Free Software Foundation; either version 1, or (at your option)  *
 *   any later version.                                                   *
 *                                                                        *
 *   This program is distributed in the hope that it will be useful,      *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of       *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        *
 *   GNU General Public License for more details.                         *
 *                                                                        *
 *   You should have received a copy of the GNU General Public License    *
 *   along with this program; if not, write to the Free Software          *
 *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.            *
 *                                                                        *
 *                                                                        *
 **************************************************************************/   

#include "ClientDB.h"
#include <iostream>

ClientDB::ClientDB()
{
	/* It will actually unlink the CDB files from the memory */
	if ( deleteArchive() == -1 ) {

		clog(GCLOG_ERR,
			"Failed to delete archive deleteArchive(): %s",
			strerror(errno));
	}
	server_db = new ServerDB(SERVER_DB_DAT);
	crawler_db = new CrawlerDB(CRAWLER_DB_DAT);
}

ClientDB::~ClientDB()
{
	delete server_db;
	delete crawler_db;
}
	
/*
 * Method:	GetInsert()
 * Description:	-- called by GET PROTOCOL --
 *		This method inserts the data before the crawling
 *		begins. It is meant to be invoked as the GET part of
 *		the protocol is executing.
 * Input:	URL -- Url to be stored
 *		size -- previous size of page in bytes
 *		CRC -- previous calculated checksum of the contents
 * Returns: 	Error codes if an error occured, otherwise 0 
 */
int ClientDB::GetInsert(const char *URL,unsigned long size,unsigned long CRC)
{
	URLHandler *handle = new URLHandler();
	
	assert( handle != NULL );

	if ( handle->URL = new char[strlen(URL)+1] ) {

		strcpy(handle->URL, URL);
	}
	else {
		clog(GCLOG_ERR,
			"Failed to allocate memory: GetInsert()");
	}
	handle->CRC = CRC;
	handle->size = size;

	server_db->InsertServedUrl( handle );

	clog(GCLOG_DEBUG,
		"GetInsert(): '%s': size=%d, CRC=%d",
		URL,size,CRC);

	if ( handle ) { delete handle; handle = NULL; }

	return 0;
}

/*
 * Method:	GetRetrieve()
 * Description:	-- called by CRAWLER --
 *		The method which retrieves a ptr to a record 
 *		from the data which was stored by GetInsert()
 *		so that the crawler can use them in crawling.
 * Returns: 	A ptr to a URLHandler structure and NULL if
 *		an error was encountered
 * NOTE:	The user of this method needs to free the 
 *		memory used by struct handle by calling delete.
 */
URLHandler * ClientDB::GetRetrieve()
{
	if ( server_db->tbl_size < 1 ) return (NULL);

	URLHandler *handle = new URLHandler();

	assert( handle != NULL );
	try {
		server_db->GetServedUrl( handle );

	} catch ( GrubExp& exp ) {

	   	clog(GCLOG_ERR,
			"Caught Exception in ClientDB::GetRetrieve(): %s",
			exp.what()
		);
		if ( handle ) { delete handle; handle = NULL; }
	}

	clog(GCLOG_DEBUG, "GetRetrieve(): %s", handle->URL);

	return handle;
}

/*
 * Method:	CrawlInsert()
 * Description: -- called by CRAWLER --
 * 		This method provides the crawler modules with
 *		the capability of storing the data which had 
 *		been crawled.
 * Input:	handle -- A ptr to the structure to insert into
 *		URL -- Url to be stored
 *		contents -- Url contents retrieved
 *		size -- size of page in bytes
 *		CRC -- calculated checksum of the contents
 *		status -- (see protocol specs and ClientDBRecord.h)
 *		mime -- the mime of the examined document
 *		redirURL -- a URL that the site redirects to
 * Return:	FAIL_ALLOC_MEM -- Fail to allocate memory.
 *		SUCCESS_CDB -- Everything went well.
 */
int ClientDB::CrawlInsert( const char *URL, const char *contents, unsigned long size,
			unsigned long CRC, status_t status, const char *MIME, 
			const char *redirURL) throw ( GrubExp )
{
	URLHandler *handle = new URLHandler();

	assert( handle != NULL && URL != NULL &&
		contents != NULL && MIME != NULL &&
		redirURL != NULL );

	if ( handle->URL = new char[strlen(URL)+1] ) {

		strcpy(handle->URL, URL);
	}
	else {
		clog(GCLOG_ERR,
			"Failed to allocate memory: CrawlInsert()");
	}

	/* Increase the size of the contents by one, in order
	 * to put a Null character in then end, in case we
	 * want to treat the contents as a string.
	 */
	if ( handle->content = new char[size+1] ) {

		memcpy( handle->content, contents, size );
	}
	else {
		clog(GCLOG_ERR,
			"Failed to allocate memory: CrawlInsert()");
	}
	handle->content[size] = '\0';

	handle->size = size;
	handle->CRC = CRC;
	handle->status = status;

	if ( handle->redirURL = new char[strlen(redirURL)+1] ) {

		strcpy(handle->redirURL, redirURL);
	}
	else {
		clog(GCLOG_ERR,
			"Failed to allocate memory: CrawlInsert()");
	}

	if ( handle->MIME = new char[strlen(MIME)+1] ) {

		strcpy(handle->MIME, MIME);
	}
	else {
		clog(GCLOG_ERR,
			"Failed to allocate memory: CrawlInsert()");
	}

	crawler_db->InsertCrawledUrl( handle );

   	clog(GCLOG_DEBUG,
		"CrawlInsert(): '%s: size=%lu CRC=%lu STATUS=%d MIME=%s'",
		handle->URL,handle->size,handle->CRC,handle->status,handle->MIME
	);

	if ( handle ) { delete handle; handle = NULL; }

	return SUCCESS_CDB;
}
/*
 * Method:	CrawlRetrieve
 * Description:	-- called by PUT PROTOCOL --
 * 		Retrieve the data from the DB after crawling 
 *		had been completed and stored in it.
 * Input:	URLHandle structure that holds URL information.
 *		URL -- The url itself
 *		contents -- Url contents
 *		size -- size of page in bytes
 *		CRC -- calculated checksum of the contents
 *		status -- (see protocol specs and ClientDBRecord.h)
 * Returns: 	ERROR CODES if an error occured, otherwise 0.
 *			- CRAWLED_URLS_TBL_EMPTY - The crawled_urls 
 * NOTE:	The user of this method needs to free the memory used 
 * 		by struct "handle" by calling delete.
 */
int ClientDB::CrawlRetrieve( URLHandler **handle ) throw (GrubExp)
{
	if ( crawler_db->tbl_size < 1 ) return (CRAWLED_URLS_TBL_EMPTY);

	if ( *handle ) { delete *handle; *handle = NULL; }
	
	*handle = new URLHandler();

	assert( *handle != NULL );
	
	try {
		crawler_db->GetCrawledUrl( *handle );

	} catch ( GrubExp& exp ) {

	   	clog(GCLOG_ERR,
			"Caught Exception in ClientDB::CrawlRetrieve(): %s",
			exp.what()
		);
		throw;
		if ( *handle ) { delete *handle; *handle = NULL; }
	}

	clog(GCLOG_DEBUG,"CrawlRetrieve(): '%s'", (*handle)->URL);

	return 0;
}

/*
 * Method:	recordCount
 * Description:	This method specificly will return the size of
 *		crawled urls database table.
 */
int ClientDB::recordCount()
{
   return crawler_db->tbl_size; 
}

/*
 * Method:	emptyArchive
 * Description:	It will delete the records of both databases, served and
 *		crawled urls.
 */
void ClientDB::emptyArchive()
{
	server_db->EmptyTable();
	crawler_db->EmptyTable();
}

/*
 * Method	deleteArchive
 * Description:	It will completely delete the CDB data files from the
 *		memory.
 */
int ClientDB::deleteArchive()
{
	int ret = 0;
	if ( unlink(SERVER_DB_DAT) == -1 )
		ret = -1;
	
	if ( unlink(CRAWLER_DB_DAT) == -1 ) 
		ret = -1;
	
	return ret;
}
