/*
    Theseus - maximum likelihood superpositioning of macromolecular structures

    Copyright (C) 2004-2009 Douglas L. Theobald

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the:

    Free Software Foundation, Inc.,
    59 Temple Place, Suite 330,
    Boston, MA  02111-1307  USA

    -/_|:|_|_\-
*/

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
#include <ctype.h>
#include "Error.h"
#include "lodmats.h"
#include "pdbMalloc.h"
#include "Coords.h"
#include "PDBCoords.h"
#include "MultiPose2MSA.h"
#include "ProcLAPACKSVD.h"
#include "ProcLAPACKSVDOcc.h"
#include "msa.h"


static int
range_selxn(int index, int *lower, int *upper, int range_num);

static int
atom_selxn(char *name, int mode);

static const char atoms0[] = ":CA :C1*:";
static const char atoms1[] = ":N  :C  :O  :CA :";

static int
atom_selxn(char *name, int mode)
{
    switch(mode)
    {
        case 0: /* "CA :P  " */
            if (strstr(atoms0, name) != NULL)
                return (1);
            break;
        case 1: /* "N  :C  :O  :CA :" */
            if (strstr(atoms1, name) != NULL)
                return (1);
            break;
    }

    return (0);
}


static int
range_selxn(int index, int *lower, int *upper, int range_num)
{
    int             i;

    for (i = 0; i < range_num; ++i)
    {
        if (index >= lower[i] && index <= upper[i] )
            return (1);
    }

    return(0);
}


/*
char
aa3toaa1(char *aa3_s)
{
    int            index_i;

    index_i = (strstr(&aa3[0], aa3_s) - &aa3[0]) / 3;

    return (aa1[index_i]);
}


void
aa1toaa3(char aa1_c, char *aa3_s)
{
    int            i, index_i;

    index_i = (strchr(&aa1[0], aa1_c) - &aa1[0]) * 3;

    for (i = 0; i < 3; ++i)
        aa3_s[i] = aa3[index_i + i];

    aa3_s[3] = '\0';
}
*/


static void
VerifyAlignmentVsPDBs(PDBCoordsArray *pdbA, CoordsArray *baseA, MSA *msa,
                      const int atomsel, int *map)
{
    int             i, j, len, aalen;
    const int       cnum = pdbA->cnum;
    const int       seqlen = msa->seqlen;
    char           *seq = NULL;

    for (i = 0; i < cnum; ++i)
    {
        seq = msa->seq[map[i]];
        len = 0;
        for (j = 0; j < seqlen; ++j)
            if (seq[j] != '-')
                ++len;

        aalen = 0;
	    for (j = 0; j < pdbA->coords[i]->vlen; ++j)
			if (atom_selxn(pdbA->coords[i]->name[j], atomsel) == 1 &&
			    (pdbA->coords[i]->altLoc[j] == ' ' || pdbA->coords[i]->altLoc[j] == 'A'))
			    ++aalen;

        baseA->coords[i]->aalen = aalen;

        if (len != aalen)
        {
			fprintf(stderr, "\n\n  ERROR1122: PDB file '%s' and sequence '%s' in '%s'",
			        pdbA->coords[i]->filename, msa->name[map[i]], msa->filename);
			fprintf(stderr, "\n            have different lengths (%d vs %d)\n\n",
			       aalen, len);
            PrintTheseusTag();
			exit(EXIT_FAILURE);
        }
    }
}


/* Dumps the indices from a multiple sequence alignment into the segID field of pdb coords */
void
Align2segID(PDBCoordsArray *pdbA)
{
    int             i, j, k;
    const int       cnum = pdbA->cnum;
    int            *map = pdbA->seq2pdb->map;
    MSA            *msa = pdbA->seq2pdb->msa;
    char           *seq = NULL;
    int             resSeq;
    char            alnindex[5];

    for (i = 0; i < cnum; ++i)
    {
        k = 0;
        seq = msa->seq[map[i]];
        /* printf("\n\n%s\n%s", pdbA->coords[i]->filename, seq); */
        for (j = 0; j < msa->seqlen; ++j)
        {
            if (seq[j] != '-') /* not a gap */
            {
                resSeq = pdbA->coords[i]->resSeq[k];
                sprintf(alnindex, "%04d", j+1);

                while(resSeq == pdbA->coords[i]->resSeq[k] && k < pdbA->coords[i]->vlen)
                {
                    strncpy(pdbA->coords[i]->segID[k], alnindex, 4);
                    //pdbA->coords[i]->tempFactor[k] = (j+1) / 100.0;

                    ++k;
                }
            }
        }
    }

/* 	for (i = 0; i < cnum; ++i) */
/* 	{ */
/* 		for (j = 0; j < vlen; ++j) */
/* 		{ */
/* 			sscanf(coords[i]->segID[j], "%4lf", &olve); */
/* 			pdbA->coords[i]->tempFactor[j] = olve / 100.0; */
/* 		} */
/* 	} */
}


static Seq2PDB
*GetMapFile(char *mapfile_name)
{
    Seq2PDB        *seq2pdb;
    FILE           *mapfile = NULL;
    int             i, numscanned, seqnum, maxseqnum;
    char            line[FILENAME_MAX + 256];
    int             ch;

    seq2pdb = Seq2pdbInit();

    mapfile = fopen(mapfile_name, "r");
    if (mapfile == NULL)
    {
        fprintf(stderr,
                "\n  ERROR691: cannot open alignment -> PDB mapping file \"%s\" \n",
                mapfile_name);
        PrintTheseusTag();
        exit(EXIT_FAILURE);
    }

    maxseqnum = 0;
    while(1)
    {
        ch = getc(mapfile);

        if (ch == EOF || ch == '\n')
            ++maxseqnum;

        if (ch == EOF)
            break;
    }

    Seq2pdbAlloc(seq2pdb, maxseqnum);

    rewind(mapfile);

    for(i = 0, seqnum = 0; i < maxseqnum; ++i)
    {
        if (fgets(line, FILENAME_MAX + 256, mapfile) == NULL)
            break;

        if (strlen(line) > 1)
        {
            numscanned = sscanf(line, "%s %s", seq2pdb->pdbfile_name[i], seq2pdb->seqname[i]);

            if (numscanned == 2)
            {
                ++seqnum;
                /* printf("\nline %d (len %d): %s", i, (int) strlen(line), line); */
            }
        }
    }

    seq2pdb->seqnum = seqnum;

    fclose(mapfile);

    return(seq2pdb);
}


void
GetSeq2PDBMap_old(PDBCoordsArray *pdbA, Seq2PDB *seq2pdb, MSA *msa)
{
    int             i, j, k;
    const int       cnum = pdbA->cnum;
    int            *map = pdbA->seq2pdb->map;

    for (i = 0; i < cnum; ++i)
    {
        for (j = 0; j < cnum; ++j)
        {
            if (strncmp(seq2pdb->pdbfile_name[i], pdbA->coords[j]->filename, strlen(pdbA->coords[j]->filename)) == 0)
            {
                for (k = 0; k < cnum; ++k)
                {
                    if (strncmp(seq2pdb->seqname[i], msa->name[k], strlen(msa->name[k])) == 0) /* now k (alignment) and j (pdb coords) match */
                        map[j] = k;
                }
            }
        }
    }
}


/* Constructs an integer vector that maps a Coords structure to a sequence name in the alignment */
/* First matches PDB filename to the PDB filename given in the mapfile.
   Every filename in the mapfile is associated with a sequence name of the same index.
   Then the fxn matches the sequence name in the mapfile with a sequence name in the alignment.
   Given these two matchings and the mapfile, the map[] vector can be constructed.
   Finally, given Coords i, then map[i] = k, where k is the index of the corresponding
   sequence in the MSA.
   NB: filename extensions are ignored. */
static void
GetSeq2PDBMap(PDBCoordsArray *pdbA, Seq2PDB *seq2pdb, MSA *msa)
{
    int             i, j, k, str1len, str2len;
    const int       cnum = pdbA->cnum;
    char            filename_root[FILENAME_MAX], msaname_root[FILENAME_MAX], seqname_root[FILENAME_MAX];

    /* First set all flags in the MSA to 1, and later reset to 0 if the sequences have a 
       corresponding PDB file.
       This allows for sequences in the MSA that aren't being superpositioned. */
    for (i = 0; i < msa->seqnum; ++i)
        msa->flag[i] = 1;

    for (i = 0; i < cnum; ++i)
    {
        for (j = 0; j < cnum; ++j)
        {
            strncpy(filename_root, getroot(pdbA->coords[i]->filename), strlen(pdbA->coords[i]->filename)+1);
            strncpy(seqname_root, getroot(seq2pdb->pdbfile_name[j]), strlen(seq2pdb->pdbfile_name[j])+1);

            str1len = strlen(seqname_root);
            str2len = strlen(filename_root);
            if ((strncmp(filename_root, seqname_root, strlen(seqname_root)) == 0) &&
                (str1len == str2len))
            {
                for (k = 0; k < msa->seqnum; ++k)
                {
                    strncpy(msaname_root, getroot(msa->name[k]), strlen(msa->name[k])+1);

                    str1len = strlen(msaname_root);
                    str2len = strlen(seqname_root);
                    if ((strncmp(seqname_root, msaname_root, strlen(msaname_root)) == 0) &&
                        (str1len == str2len)) /* now k (alignment) and i (pdb coords) match */
                    {
                        pdbA->seq2pdb->map[i] = k;
                        msa->flag[k] = 0; /* reset msa flag to 0 since this structure exists */
                        break;
                    }
                }

				if (k == msa->seqnum)
				{
					fprintf(stderr,
							"\n  ERROR689: Sequence #%d (%s) in mapefile has no corresponding sequence in the alignment\n",
							j+1, seq2pdb->pdbfile_name[j]);
					PrintTheseusTag();
					exit(EXIT_FAILURE);
				}

                break;
            }
        }

        if (j == cnum)
        {
			fprintf(stderr,
					"\n  ERROR689: PDB file #%d (%s) has no corresponding sequence in the alignment\n",
					i+1, pdbA->coords[i]->filename);
			PrintTheseusTag();
			exit(EXIT_FAILURE);
        }
    }
}


/* Same as above, but no mapfile needed as we assume that the sequence names and 
   PDB file names are identical */
static void
DefaultSeq2PDBMap(PDBCoordsArray *pdbA, Seq2PDB *seq2pdb, MSA *msa)
{
    int             i, j;
    const int       cnum = pdbA->cnum;
    char            filename_root[FILENAME_MAX], msaname_root[FILENAME_MAX];

    /* First set all flags in the MSA to 1, and later reset to 0 if the sequences have a 
       corresponding PDB file.
       This allows for sequences in the MSA that aren't being superpositioned. */
    for (i = 0; i < msa->seqnum; ++i)
        msa->flag[i] = 1;

    for (i = 0; i < cnum; ++i)
    {
        for (j = 0; j < msa->seqnum; ++j)
        {
            strncpy(filename_root, getroot(pdbA->coords[i]->filename), strlen(pdbA->coords[i]->filename)+1);
            strncpy(msaname_root, getroot(msa->name[j]), strlen(msa->name[j])+1);

            if (strncmp(filename_root, msaname_root, FILENAME_MAX) == 0)
            {
                seq2pdb->map[i] = j; /* now j (alignment) and i (pdb coords) match */
                msa->flag[j] = 0; /* reset msa flag to 0 since this structure exists */
                break;
            }
        }

        if (j == msa->seqnum)
        {
			fprintf(stderr,
					"\n  ERROR690: PDB file #%d (%s) has no corresponding sequence in the alignment\n",
					i+1, pdbA->coords[i]->filename);
			PrintTheseusTag();
			exit(EXIT_FAILURE);
        }
    }
}


/* check for singletons, i.e. columns with only one non-gap residue */
static void
GetSingletons(int *singletons, MSA *msa)
{
    int             i, j, count;

    for (i = 0; i < msa->seqlen; ++i)
    {
        count = 0;
        for (j = 0; j < msa->seqnum; ++j)
        {
            /* The flag for each sequence was set/unset in GetSeq2PDBMap() or DefaultSeq2PDBMap()
               If flag is set to 1, we ignore it (because it has no PDB structural counterpart) */
            if (msa->seq[j][i] != '-' && msa->flag[j] == 0)
                ++count;
        }

        /* This flags all columns that have either only one or NO residues,
           and both need to be excluded from the superposition */
        if (count == 1 || count == 0)
            singletons[i] = 1;
        else
            singletons[i] = 0;
    }
}


/* check for ubiqs, i.e. columns with no gaps */
static void
GetUbiqs(int *ubiqs, MSA *msa)
{
    int             i, j, count;

    for (i = 0; i < msa->seqlen; ++i)
    {
        count = 0;
        for (j = 0; j < msa->seqnum; ++j)
        {
            /* The flag for each sequence was set/unset in GetSeq2PDBMap() or DefaultSeq2PDBMap()
               If flag is set to 1, we ignore it (because it has no PDB structural counterpart) */
            if (msa->seq[j][i] == '-' && msa->flag[j] == 0)
                ++count;
        }
//printf("\ncount[%d] = %d\n", i, count);
        /* if ANY gaps were counted, set flag. 
           NOTE that this does not do anything for the other
           flags -- they are the same as when ubiqs was passed */
        if (count > 0)
            ubiqs[i] = 1;
    }
}


static void
GetCoordsFrAlignment(PDBCoordsArray *pdbA, CoordsArray *baseA,
                     Seq2PDB *seq2pdb, MSA *msa,
                     const int vlen, const int alignlen,
                     int *singletons)
{
    int             j, k, m, n, p, q;
    const int       cnum = pdbA->cnum;
    int            *map = seq2pdb->map;
    int            *upper = pdbA->upper; /* arrays of ints holding the upper and lower range bounds */
    int            *lower = pdbA->lower;
    int             range_num = pdbA->range_num;

	for (j = 0; j < cnum; ++j)
	{
	    k = map[j];
		m = n = p = 0;
		while(m < pdbA->coords[j]->vlen && n < vlen  && p < alignlen)
		{
			/* m = PDB length */
			/* n = baseA coords length */
			/* p = sequence alignment length */
			/* k = sequence index */
			/* j = pdb and baseA coords index */

/*                             printf("\n1 n:%d(%d) atomname:pdbA->coords[%d]->name[%d] %s", */
/*                                    n, alignlen, j, m, pdbA->coords[j]->name[m]); */
/*                             fflush(NULL); */
/* if (j == cnum - 1) */
/* printf("\naltLoc:%c", pdbA->coords[j]->altLoc[m]); */
			if (atom_selxn(pdbA->coords[j]->name[m], baseA->algo->atoms) == 1 &&
			    (pdbA->coords[j]->altLoc[m] == ' ' || pdbA->coords[j]->altLoc[m] == 'A'))
			{
/*                                 printf("\n1 m:%d(%d) msa->seq[%d(%d)][%d(%d)] = %c", */
/*                                        m, pdbA->coords[j]->vlen, k, cnum, n, alignlen, msa->seq[k][n]); */
/*                                 fflush(NULL); */
				if (range_selxn(p, lower, upper, range_num) == 1 - baseA->algo->revsel && /* in-range, or out-of-range if revsel == 1 */
				    singletons[p] == 0) /* not a singleton */
				{
					if (msa->seq[k][p] != '-') /* not a gap */
					{
						strncpy(baseA->coords[j]->resName[n], pdbA->coords[j]->resName[m], 3);
						baseA->coords[j]->chainID[n] = pdbA->coords[j]->chainID[m];
						baseA->coords[j]->resSeq[n]  = pdbA->coords[j]->resSeq[m];
						baseA->coords[j]->x[n]       = pdbA->coords[j]->x[m];
						baseA->coords[j]->y[n]       = pdbA->coords[j]->y[m];
						baseA->coords[j]->z[n]       = pdbA->coords[j]->z[m];
						baseA->coords[j]->o[n]       = 1.0;
						baseA->coords[j]->b[n]       = pdbA->coords[j]->tempFactor[m];

						++m;
						++n;
					}
					else /* is a gap */
					{
						strncpy(baseA->coords[j]->resName[n], "GAP", 3);
						baseA->coords[j]->chainID[n] = pdbA->coords[j]->chainID[m];
						baseA->coords[j]->resSeq[n]  = 0;
						baseA->coords[j]->x[n]       = 0.0;
						baseA->coords[j]->y[n]       = 0.0;
						baseA->coords[j]->z[n]       = 0.0;
						baseA->coords[j]->o[n]       = 0.0;
						baseA->coords[j]->b[n]       = 99.99;

						++n;
					}
				}
				else /* out of range */
				{
					if (msa->seq[k][p] != '-') /* not a gap */
					{
						++m;
					}

				}

				++p;
/* 				            if (j == cnum-1) */
/*                                 printf("\n2 m:%d(%d) msa->seq[%d(%d)][%d(%d)] = %c", */
/*                                        m, pdbA->coords[j]->vlen, k, cnum, n, alignlen, msa->seq[k][n]); */
/*                                 fflush(NULL); */
			}
			else /* not the proper atom slxn */
			{
				++m;
			}
/* 			            if (j == cnum-1) */
/*                             printf("\n2 n:%d(%d) atomname:pdbA->coords[%d]->name[%d] %s", */
/*                                    n, alignlen, j, m, pdbA->coords[j]->name[m]); */
/*                             fflush(NULL); */
		}
		/*******************************************************************************************/
		/* if the end of the PDB is before the end of the alignment */
		if (n < vlen && m != 0 && n != 0)
		{
/* 		    printf("\nHere: m:%4d n:%4d p:%4d j:%4d -- %4d %4d %4d %4d", */
/* 		            m, n, p, j, pdbA->coords[j]->vlen, vlen, alignlen, cnum); */

			for (q = n; q < vlen; ++q)
			{
				strncpy(baseA->coords[j]->resName[q], "GAP", 3);
				baseA->coords[j]->chainID[q] = pdbA->coords[j]->chainID[m-1];
				baseA->coords[j]->resSeq[q]  = 0;
				baseA->coords[j]->x[q]       = 0.0;
				baseA->coords[j]->y[q]       = 0.0;
				baseA->coords[j]->z[q]       = 0.0;
				baseA->coords[j]->o[q]       = 0.0;
				baseA->coords[j]->b[q]       = 66.66;
			}
		}
	}
}


static int
ParseSelxns(PDBCoordsArray *pdbA, char *selection, const int alignlen, int *singletons, const int revsel)
{
    int             i, j, singleton_cnt;
    int             selxn_len, vlen;
    char          **endptr = NULL;
    char          **selections = NULL; /* an array of pdbA->range_num strings to hold each range selection */
    char            delims[] = ":";

    if (selection != NULL)
    {
        selxn_len = strlen(selection);

        pdbA->range_num = 1;
        for(i = 0; i < selxn_len; ++i)
        {
            if (selection[i] == ':')
                ++(pdbA->range_num);
        }

        selections = (char **) calloc(pdbA->range_num, sizeof(char *));
        pdbA->lower = (int *)  calloc(pdbA->range_num, sizeof(int));
        pdbA->upper = (int *)  calloc(pdbA->range_num, sizeof(int));
        if (selections == NULL || pdbA->lower == NULL || pdbA->upper == NULL)
        {
            perror("\n  ERROR");
            fprintf(stderr, " ERROR: could not allocate memory for selections in GetCoordsSelection(). \n\n");
            PrintTheseusTag();
            exit(EXIT_FAILURE);
        }
        
        for (i = 0; i < pdbA->range_num; ++i)
        {
            selections[i] = (char *) calloc(128, sizeof(char));
            if (selections[i] == NULL)
            {
                perror("\n  ERROR");
                fprintf(stderr, " ERROR: could not allocate memory for selections[] in GetCoordsSelection(). \n\n");
                PrintTheseusTag();
                exit(EXIT_FAILURE);
            }   
        }

        /* copy each range selection string into the 'selections[]' array */
        mystrncpy(selections[0], strtok(selection, delims), 127); 
        for (i = 1; i < pdbA->range_num; ++i)
            mystrncpy(selections[i], strtok(NULL, delims), 127);

/*         for (i = 0; i < pdbA->range_num; ++i) */
/*             printf"\n selections[%d] = %s", i, selections[i]); */

        for (j = 0; j < pdbA->range_num; ++j)
        {
            /* parse residue number range */
            selxn_len = strlen(selections[j]);

            i = 0;
            while(isspace(selections[j][i]) && i < selxn_len)
                ++i;

            if (isdigit(selections[j][i]))
            {
                pdbA->lower[j] = (int) strtol(&selections[j][i], endptr, 10) - 1;

                while(selections[j][i] != '-' && i < selxn_len)
                    ++i;

                ++i;
                while(isspace(selections[j][i]) && i < selxn_len)
                    ++i;

                if (isdigit(selections[j][i]))
                    pdbA->upper[j] = (int) strtol(&selections[j][i], endptr, 10) - 1;
                else
                {
                    fprintf(stderr, "\n\n  ERROR987: one of the column selections has no upper bound. \n\n");
                    fprintf(stderr, "  ERROR987: upper limit %d = %s \n\n", j, &selections[j][i]);
                    PrintTheseusTag();
                    exit(EXIT_FAILURE);
                }

                if (pdbA->upper[j] >= alignlen)
                {
                    fprintf(stderr, "\n\n  ERROR988: one of the column selections is out of bounds for the alignment. \n");
                    fprintf(stderr, "  ERROR988: upper limit %d = %d; alignment length = %d \n\n", j, pdbA->upper[j], alignlen);
                    PrintTheseusTag();
                    exit(EXIT_FAILURE);
                }
            }
            else
            {
                pdbA->lower[j] = 0;
                pdbA->upper[j] = alignlen - 1;
            }
        }
    }
    else
    {
        pdbA->range_num = 1;
        selections = (char **) calloc(1, sizeof(char *));
        pdbA->lower = (int *) calloc(1, sizeof(int));
        pdbA->upper = (int *) calloc(1, sizeof(int));
        selections[0] = (char *) calloc(128, sizeof(char));
        if (selections == NULL || pdbA->lower == NULL || pdbA->upper == NULL || selections[0] == NULL)
        {
            perror("\n  ERROR");
            fprintf(stderr, " ERROR989: could not allocate memory for selections in GetCoordsSelection(). \n\n");
            PrintTheseusTag();
            exit(EXIT_FAILURE);
        }

        pdbA->lower[0] = 0;
        pdbA->upper[0] = alignlen - 1;
    }

    if (revsel == 0)
    {
		vlen = 0;
		for (j = 0; j < pdbA->range_num; ++j)
			vlen += (pdbA->upper[j] - pdbA->lower[j] + 1);

        /* don't count singletons that are in selected ranges */
        for (j = 0; j < pdbA->range_num; ++j)
            for (i = 0; i < alignlen; ++i)
                if (i >= pdbA->lower[j] && i <= pdbA->upper[j] && singletons[i] == 1)
                    vlen--;
    }
    else
    {
		vlen = alignlen;
		for (j = 0; j < pdbA->range_num; ++j)
			vlen -= (pdbA->upper[j] - pdbA->lower[j] + 1);

        singleton_cnt = 0;
        for (i = 0; i < alignlen; ++i)
            if (singletons[i] == 1)
                ++singleton_cnt;

        /* don't count singletons that are in (un)selected ranges */
        for (j = 0; j < pdbA->range_num; ++j)
            for (i = 0; i < alignlen; ++i)
                if (i >= pdbA->lower[j] && i <= pdbA->upper[j] && singletons[i] == 1)
                    singleton_cnt--;

        vlen -= singleton_cnt;
    }

    if (vlen > alignlen)
    {
        fprintf(stderr, "\n  ERROR663: the number of selected alignment columns (%d) exceeds the maximum length of the alignment (%d) \n\n", vlen, alignlen);
        PrintTheseusTag();
        exit(EXIT_FAILURE);
    }

    for (i = 0; i < pdbA->range_num; ++i)
        free(selections[i]);

    free(selections);

    return(vlen);
}


void
Align2MSA(PDBCoordsArray *pdbA, CoordsArray *baseA, char *msafile_name, char *mapfile_name)
{
    int             i, alnlen, vlen;
    const int       cnum = pdbA->cnum;
    MSA            *msa;
    int            *singletons = NULL;
    Algorithm      *algo = baseA->algo;

    if (mapfile_name == NULL)
    {
        pdbA->seq2pdb = Seq2pdbInit();
        Seq2pdbAlloc(pdbA->seq2pdb, cnum);
    }
    else
    {
        pdbA->seq2pdb = GetMapFile(mapfile_name);
    }

    pdbA->seq2pdb->msa = msa = getmsa(msafile_name);

/*     MSAprint(msa); */
/*     printf("\n%d", msa->seqlen); */
/*     fflush(NULL); */

    if (msa->seqnum < pdbA->cnum)
    {
        fprintf(stderr,
                "\n  ERROR682: # alignment sequences (%d) < # coords (%d)\n",
                msa->seqnum, pdbA->cnum);
        PrintTheseusTag();
        exit(EXIT_FAILURE);
    }

    alnlen = msa->seqlen;
    singletons = malloc(alnlen * sizeof(int));

    if (mapfile_name != NULL)
        GetSeq2PDBMap(pdbA, pdbA->seq2pdb, msa);
    else
        DefaultSeq2PDBMap(pdbA, pdbA->seq2pdb, msa);

    GetSingletons(singletons, msa);
    if (algo->missing == 1)
        GetUbiqs(singletons, msa);
//    for (i=0;i<alnlen;++i)
//        printf("singleton[%3d]:%d\n", i, singletons[i]);
    vlen = ParseSelxns(pdbA, algo->selection, alnlen, singletons, algo->revsel);

/*     printf("\n%d %d %d", msa->seqlen, alnlen, vlen); */

    /* check for singletons, i.e. columns with only one non-gap residue */
/*     for (i = 0; i < alnlen; ++i) */
/*     { */
/*         count = 0; */
/*         for (j = 0; j < cnum; ++j) */
/*         { */
/*             if (msa->seq[j][i] != '-') */
/*                 ++count; */
/*         } */
/* //printf("\ncol:%d count:%d", i, count); */
/*         if (count == 1) */
/*             --vlen; */
/*     } */
//    printf("\n%d %d %d\n", msa->seqlen, alnlen, vlen);

    /* allocate a CoordsArray based on this alignment */
    if (algo->atoms == 0)
    {
        CoordsArrayAlloc(baseA, cnum, vlen);
    }
    else
    {
        fprintf(stderr,
                "\n  ERROR683: atom selection must be CAs for superpositioning with a sequence alignment\n");
        PrintTheseusTag();
        exit(EXIT_FAILURE);
    }

    for (i = 0; i < cnum; ++i)
        strncpy(baseA->coords[i]->filename, pdbA->coords[i]->filename, FILENAME_MAX - 1);

    VerifyAlignmentVsPDBs(pdbA, baseA, msa, algo->atoms, pdbA->seq2pdb->map);
    GetCoordsFrAlignment(pdbA, baseA, pdbA->seq2pdb, msa, vlen, alnlen, singletons);

    free(singletons);
}


double
CalcRotationsOcc(CoordsArray *cdsA)
{
    Coords        **coords = cdsA->coords;
    Coords         *avecoords = cdsA->avecoords;
    const double   *wts = (const double *) cdsA->w;
    const double   *axesw = (const double *) cdsA->axesw;
    double          deviation = 0.0, deviation_sum = 0.0;
    int             i;

    for (i = 0; i < cdsA->cnum; ++i)
    {
        if(cdsA->algo->covweight != 0)
        {
                deviation = ProcLAPACKSVDCovOcc(coords[i], avecoords, coords[i]->matrix,
                                                (const double **) cdsA->WtMat, axesw,
                                                cdsA->tmpmat3a,
                                                cdsA->tmpmat3b,
                                                cdsA->tmpmat3c,
                                                cdsA->tmpvec3a);
        }
        else if(cdsA->algo->varweight != 0 || cdsA->algo->leastsquares != 0)
        {
                deviation = ProcLAPACKSVDOcc(coords[i], avecoords,
                                             coords[i]->matrix,
                                             wts, axesw,
                                             cdsA->tmpmat3a,
                                             cdsA->tmpmat3b,
                                             cdsA->tmpmat3c,
                                             cdsA->tmpvec3a);
        }

        coords[i]->wRMSD_from_mean = sqrt(deviation / cdsA->vlen);
        deviation_sum += deviation;

        if (cdsA->algo->verbose == 1)
        {
            /* rmsd from mean would usually need a 2 in denom,\
               but this is already deviation from mean,
               since structure #2 is the average structure */
            printf("CalcRotationsOcc:%5d %8.3f %13.3f \n",
                   i+1,
                   coords[i]->wRMSD_from_mean,
                   deviation);
        }
    }

    return(deviation_sum);
}


/* char aa1[] = "ARNDCQEGHILKMFPSTWYV"; */
/* char aa3[] = "ALAARGASNASPCYSGLNGLUGLYHISILELEULYSMETPHEPROSERTHRTRPTYRVAL"; */

void
pdb2fst(PDBCoordsArray *pdbA)
{
    int             i, j, count, aaindex;
    char           *pindex = NULL;
    FILE           *fp;
    char            outfile[FILENAME_MAX];

    for (i = 0; i < pdbA->cnum; ++i)
    {
        strncpy(outfile, pdbA->coords[i]->filename, strlen(pdbA->coords[i]->filename)+1);
        strncat(outfile, ".fst", 4);

        fp = fopen(outfile, "w");

        //fprintf(fp, ">%-72s", pdbA->coords[i]->filename);
        fprintf(fp, ">%-s", pdbA->coords[i]->filename);

        count = 0;
        for (j = 0; j < pdbA->coords[i]->vlen; ++j)
        {
            //printf("ATOM %4d:%4d \'%s\'\n", i, j, pdbA->coords[i]->name[j]);

            if ( (strncmp(pdbA->coords[i]->record[j], "ATOM  ", 6) == 0 || strncmp(pdbA->coords[i]->record[j], "HETATM", 6) == 0)
                &&
                (strncmp(pdbA->coords[i]->name[j], "CA ", 3) == 0 || strncmp(pdbA->coords[i]->name[j], "C1*", 3) == 0) &&
                (pdbA->coords[i]->altLoc[j] == ' ' || pdbA->coords[i]->altLoc[j] == 'A'))
            {
				if (count % 72 == 0)
					fputc('\n', fp);

                pindex = strstr(aan3, pdbA->coords[i]->resName[j]);
                if (pindex == NULL)
                    fputc('X', fp);
                else
                {
					aaindex = (int) (pindex - &aan3[0])/3;
					fputc(aan1[aaindex], fp);
                }

                ++count;
            }
        }

        fputc('\n', fp);

        printf("    %4d %s  (%d aa)\n", i+1, outfile, count);

        fflush(NULL);
        fclose(fp);
    }
}
