#include<fstream.h>
#include <iostream.h>
#include <iostream.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>

//#define SIZE 300
//#define NSQ  100  // no of sub sequences
#define display 0
void Subsequence(int, int);
void splicer();
void LCS(char *, char*, char *, int, int&); 
void substring(char* ,char* , int , int );
void mergeSub(char* ,char* ,char* , char * , int);
void align(char**, int, char*, ofstream&);
void display_html_original(ofstream& , char **, int);
void display_html_1(ofstream&, char** );
void display_html_2(ofstream&, char* ,char*, float);
void display_html_merged(ofstream&, char *);
void display_error(ofstream& , int , int);

//ofstream fout2;

int SIZE=300; // the size of the genome sequence

int main(int argc, char* argv[])
{	
	
	char newSeq[SIZE+1], nextSeq[SIZE+1], overlap[SIZE+1], mergedSeq[SIZE+1];
	char mainSeq[SIZE+1];

	int noSeq;
	int overlapSize,oldOverSize=0;
	int type;

	//int NSQ=100;
	
	SIZE=atoi(argv[1]);  // get the size of the main sequence from the user

	int NSQ=atoi(argv[2]); //get the number of subsequences desired

	//if sequence size is small and number of subsequences are less than two display an error message
	if(SIZE < 10 || NSQ <2 )
	{
	if(display ==1)
	{
 //If user chooses display, a html file is created that contains the results.
        ofstream fout2;
	 fout2.open("genome.html");
	 display_error(fout2, SIZE, NSQ);	
	}		
	return 0;
	}

	cout<<SIZE<<"  "<<NSQ<<" "; 
	
	time_t seconds1, seconds2;
	float totaltime;

	int i,j;
	ifstream ifp;
	ifp.open("subseq.dat"); // a file containing the sequences and subsequences which will be used for alignment, this feature will be used later!

	//If user chooses display, a html file is created that contains the results.
	ofstream fout2;
	if(display==1)
	  {
	 //If user chooses display, a html file is created that contains the results.
        //ofstream fout2;


	  // getenv("HOME");
	  //fout2.open("/public_html/code/genome.html");
	    fout2.open("genome.html");	  
	}
	//create data structure to store the sub sequences
	char **data;
	data= new char*[NSQ]; //allocate memory to data NSQxSIZE  
	for(i=0; i<NSQ; i++)
	{
		data[i]= new char[SIZE+1];
	}


	// generate test data
	Subsequence(SIZE,NSQ);

	//start timer
	seconds1 = time (NULL);

	//read the main sequence from the file
	ifp.getline(mainSeq, SIZE+1, '\n');

	//read all thesubsequences into the variable data
	for( i=0; i<NSQ; i++)
	{
		ifp.getline(data[i], SIZE+1, '\n');
	}

	//If display is turned on, the display file is initialized
	if(display==1)
	   display_html_1(fout2, data);	
	

	//aling sequences
	align(data, NSQ, mergedSeq, fout2);

	//stop timer
	seconds2 = time (NULL);

//	cout<<"\n\n ------------Main Sequence---------------\n "<<mainSeq<<endl;
//	cout<<"\n\n ------------Final Sequence--------------------\n "<<mergedSeq;

//	cout<<"\n\n ------------Length of final sequence: "<<strlen(mergedSeq);

	cout<<strlen(mergedSeq);

	//check whether the aligned sequences matches the main sequence and give message to user
	if(!strcmp(mainSeq,mergedSeq))
	{
		//cout<<"\n\nThey match!!\n";
		cout<<" ";
	}
	else
	{
		//cout<<"\n\nCould not get the entire sequence but the largest!!\n";
		cout<<" ";
	}

	totaltime=(seconds2-seconds1); //   /(60.0);
	//cout<<"\nTotal time for assembly is "<<(seconds2-seconds1)<<" seconds "<<endl;
	cout<<totaltime<<endl;

	//display the final results
	if(display==1)
	   display_html_2(fout2, mainSeq, mergedSeq, totaltime);
	
	//cout<<"1="<<NSQ;

		
	
//free space
/*
for(i=0; i<=NSQ; i++)
{  
	delete[] data[i];
//	delete[] mergeRes[i];
}
delete[] data;
//delete[] mergeRes;

*/
return 0;

}

/**********************************************

Function to chop the sequence into subsequences

**********************************************/
void Subsequence(int seqSize, int noSubSeq)
{
	
	srand(time(NULL));

	char nucleo[4]={'A', 'T', 'C', 'G'};	
	char seq[SIZE+1];
	int start, endse, temp;
	
	//output data to a file
	ofstream fout;
	fout.open("subseq.dat");

//	cout<<"Sequence .........................\n";

	int i;
	for(i=0; i<seqSize; i++)
	{
		seq[i]=nucleo[rand()%4];	
		//cout<<seq[i];
		fout<<seq[i];
	}
	//save the actual sequence for comparison
	fout<<endl;

	//fout<<noSubSeq<<endl;

	for(i=0; i<noSubSeq; i++)
	{
//		cout<<"\nSub Sequence "<<i<<" .........................\n";
		start=rand()%seqSize; 
		endse=rand()%seqSize;
		if(start > endse)
		{
			temp=start;
			start=endse;
			endse=temp;
		}
		while(start==endse)// regenerate sub sequence if empty
		{
			start=rand()%seqSize; 
			endse=rand()%seqSize;
			
			if(start > endse)
			{
				temp=start;
				start=endse;
				endse=temp;
			}

		}
//		cout<<"Start "<<start<<" End "<<endse<<endl;

		int j;
		for( j=start; j<=endse; j++)	
		{ 
	//		cout<<seq[j];
			fout<<seq[j];
		}
		fout<<endl;
	}
	
	fout.close();

}

/**********************************************

Function to aling sequences, exact alignment 

**********************************************/	
void align(char **data, int nsq, char *mRes, ofstream& fout2)
{
		
	char newSeq[SIZE+1], nextSeq[SIZE+1], overlap[SIZE+1], mergedSeq[SIZE+1],maxSeq[SIZE+1];
	int noSeq;
	int overlapSize,oldOverSize=0;
	int type;
	int i,j, maxlen=0;


	
	mRes[0]='\0';
	i=0;
	
	while(i<nsq)
	{
		maxSeq[0]='\0'; 
		
		//the original sub sequence
	//	memcpy(maxSeq,data[i],strlen(data[i])*sizeof(char)+1);
		
		
		//next one onwards
		for( j=i+1; j<nsq; j++)
		{
			//find LCS
			LCS(data[i],data[j],overlap,2, type);
			
			overlapSize=strlen(overlap);
			
			if(overlapSize>0 && type>0)	//has a match, creates an empty mergedSubst if not checked, to be used later to define size of overlap  
			{
				mergeSub(data[i], data[j],overlap, mergedSeq, type);
			
				//show only new sequences
				if((strlen(mergedSeq) > strlen(data[i])) && (strlen(mergedSeq) > strlen(data[j])))
				{
				//	cout<<"\n..........................................................."<<endl;
				//	cout<<"\n\nSequence 1  Length:"<<strlen(data[i])<<"\n"<<data[i];
				//	cout<<"\n\nSequence 2 Length:"<<strlen(data[j])<<"\n"<<data[j];
				//	cout<<endl<<i<<":"<<j<<" Over lap "<<overlap<<endl;
				//	cout<<"\n\nMerged Sequence Length :"<<strlen(mergedSeq)<<"\n"<<mergedSeq;				
				
					//pick up the longest merged subsequence   
					if(strlen(mergedSeq) >= strlen(maxSeq))
					{
						memcpy(maxSeq,mergedSeq,strlen(mergedSeq)*sizeof(char)+1);
					}
				}
	
			}
		
	
		
		}//for

		//copy the longest sub sequence
		memcpy(data[i], maxSeq, strlen(maxSeq)*sizeof(char)+1);

		i++;
	}//while

	
	////Combine the results into one final sequence
	j=0;
	int oldmax;
	maxlen=0;
	while((j < nsq) && (maxlen<SIZE))
	{
	        oldmax=maxlen;
		maxSeq[0]='\0';
		
		if(strlen(data[j])!=0)
		{
			for(i=0; i<nsq; i++)
			{
		        if(strlen(data[i])!=0){
				//find LCS
				LCS(data[j],data[i],overlap,2, type);
			
				overlapSize=strlen(overlap);
			
				if(overlapSize>0 && type>0)	//has a match, creates an empty mergedSubst if not checked, to be used later to define size of overlap  
				{
					mergeSub(data[j], data[i],overlap, mergedSeq, type);
			
					//show only new sequences
					if((strlen(mergedSeq) > strlen(data[i])) && (strlen(mergedSeq) > strlen(data[j])))
					{
					//	cout<<"\n..........................................................."<<endl;
					//	cout<<"\n\nSequence 1  Length:"<<strlen(mergeRes[i])<<"\n"<<mergeRes[i];
					//	cout<<"\n\nSequence 2  Length:"<<strlen(mergeRes[j])<<"\n"<<mergeRes[j];
					//	cout<<endl<<i<<":"<<j<<" Over lap "<<overlap<<endl;
					//	cout<<"\n\nMerged Sequence Length :"<<strlen(mergedSeq)<<"\n"<<mergedSeq;				
				
						//pick up the longest merged subsequence   
						if(strlen(mergedSeq) >= strlen(maxSeq))
						{
							//if(display)
						  //display_html_original(fout2,data, nsq);

							memcpy(maxSeq,mergedSeq,strlen(mergedSeq)*sizeof(char)+1);
		
							if(strlen(mergedSeq) >= maxlen)
								maxlen=strlen(maxSeq);
						}
					}//if
	
				}//if
			}//if	
			}//for

			//copy the longest sub sequence
			
			data[j]=maxSeq;
			//cout<<"new res="<<mergeRes[j];
		
		
		
		} //if
		
		
		if(strlen(data[j]) >= strlen(mRes))
		{
		 if(display==1)
		  display_html_original(fout2, data, nsq);
		  
			memcpy(mRes,data[j],strlen(data[j])*sizeof(char)+1);
			//cout<<"Max so far"<<mRes<<endl;
			maxlen=strlen(mRes);
			//cout<<"Max len"<<maxlen;
			
			if(display==1)
			display_html_merged(fout2, mRes);
		}
		j++;
		
		//if the string is changing we continue
		if((j==nsq) && (maxlen > oldmax))
		  {
		    j=0;
		  }

	}//while

 }

/**********************************************

Function to put back the subsequences, using LCS -Dynamic programming

Notes:Conventions used -1 for up, -2 for left and -3 for up and left in the table

  x and y are the sub sequences to be compared, y is the overlap, style is the type of overlap
  contiguous for 2 and 1 for non-contiguous.  

**********************************************/
void LCS(char *x, char *y, char *z, int style, int &type)
{

//variables	
int i,j, xstart, ystart, xend, yend;

int x_l, y_l;

//cout<<"***********************"<<x<<endl<<"***************"<<y<<endl;
x_l=strlen(x);   //lenght of string x and y
y_l=strlen(y);

if(x_l!=0 && y_l!=0)
{

int **c, **b;		//allocate space
c= new int* [x_l+1];
b= new int* [x_l+1];

for(i=0; i<=x_l; i++)
{
	b[i]=new int[y_l+1];
	c[i]=new int[y_l+1];

	for(j=0; j<=y_l; j++)
		b[i][j]=0;
}


for(i=0; i<=x_l; i++)   //initialize the tables b and c
{
		c[i][0]=0;
		b[i][0]=-1;   //-1 indicates up
}


for(i=0; i<=y_l; i++)   //initialize the tables b and c
{
		c[0][i]=0;
		b[0][i]=-2;  //-2 indicates left
}


//dynamic programming, constructs the table that can be used to reconstruct the LCS

for(i=1; i<=x_l; i++)
	for(j=1; j<=y_l; j++)
	{
		if(x[i-1]==y[j-1])  //two characters in the strings match
		{
			c[i][j]=c[i-1][j-1]+1;
		
				b[i][j]=-3;           //indicates up and left
		
		}
		else if(c[i-1][j]>=c[i][j-1])
		{
			c[i][j]=c[i-1][j];
			b[i][j]=-1;
		}
		else
		{
			c[i][j]=c[i][j-1];
			b[i][j]=-2;
		}
	
	}

	//the largest no is the length of the common string
	int len=c[x_l][y_l];
	int start, stop=0, max, oldstart, oldstop, ctr, len2=0;

	int jcopy;

	
	z[len]='\0';

	i=x_l; j=y_l;

//back track
	oldstop=0; stop=0; ctr=0;
	oldstart=0; yend=0;
	start=i; jcopy=j;

	//if(style==1)
{
while(i>0 || j>0)
{
	if(b[i][j]==-3)
	{
		i--; j--;
		len--;
		z[len]=x[i];

		
		if(ctr==0)
		{
			start=i;
			jcopy=j;
			stop=0;
			ctr=1;
		}
		else
		{

			stop++;

			if(i==0)
			{
				if(stop>=oldstop)
				{
					oldstop=stop;
					oldstart=start;
					yend=jcopy;
				}

			}
		}
	}
	else if(b[i][j]==-1)
	{
		i--;
	
		if(stop>=oldstop)
		{
			oldstop=stop;
			oldstart=start;
			yend=jcopy;

		}
		ctr=0;
		
	}
	else if(b[i][j]==-2)
	{
		j--;

		if(stop>=oldstop)
		{
			oldstop=stop;
			oldstart=start;
			yend=jcopy;
		}	
		ctr=0;
		
	}

}
} //style 1


int i1,j1, count=0, oldcount=0;

i=oldstart-oldstop+1;
j=yend-oldstop+1;

oldstart=xstart=i;
ystart=j;



if(style==2)
{
	while( i<=x_l && j<=y_l &&(b[i][j]==-3))
	{

		i++;
		j++;
	count++;
	
	}
	xend=oldstart=i-1;
	yend=j-1;
	oldstop=count;
}



if(oldstop ==0)
{
	z[0]='\0';
	type=0;   // no match
}
else if(oldstop>0 && style==2) 
{	
	xstart=oldstart-oldstop;
	xend=oldstart;

	ystart=yend-oldstop;
	
	oldstart--;
	substring(z, x, xstart, oldstart); //we only want it if its more than 1 char
	
	//Determine if the common substring is at the ends
	if((xend ==x_l) &&  (ystart==0)) //suffix of x prefix of y
	{

		type = 1;
	}
	else if((xstart==0) && (yend==y_l))  //suffix of y prefix of x
	{

		type = 2;
	}
	else
		type = 0; // not a suffix or prefix
}




//free space

for(i=0; i<=x_l; i++)
{  
	delete[] b[i];
	delete[] c[i];
}
delete[] b;
delete[] c;

}// if x_l !=0 ....

  }

/**********************************************

Function to merge two subsequences based on 
an exact match at the edges.

 
**********************************************/
void mergeSub(char* x,char* y,char* overlap, char * z, int type)
{
	int i,j;
	int len_m=strlen(overlap);
	int len_x=strlen(x);
	int len_y=strlen(y);

	if(type==1) // suffix of x prefix of y
	{
		strcpy(z,x); //copy x to new string
		for(i=len_x, j=len_m; j<len_y; i++,j++)
		{
			z[i]=y[j];
		}
		z[i]='\0';

	}
	else if (type ==2)  //suffix of y prefix of x
	{
		strcpy(z,y); //copy x to new string
		for(i=len_y, j=len_m; j<len_x; i++,j++)
		{
			z[i]=x[j];
		}
		z[i]='\0';
	}
	else if (type ==0)
	{
		z[0]='\0';
	}

}

/**********************************************

Function to put back the subsequences-Not in Use

**********************************************/
void splicer(int type)
{


}
/************************************************************/
void substring(char* y,char* x, int start, int len)
{
  int i,j;
	for( i=start, j=0; i<=len; i++,j++)
	{
		y[j]=x[i]; 
	}
	y[j]='\0';
}

/*******************************************************
*
*
*
********************************************************/


void display_html_1(ofstream& fout2, char **data)
{

	int i;
	fout2<<"<html><head></head><body>";


	//display_html_original(fout2,data);


}
void display_html_2(ofstream& fout2, char *orig, char *merge, float time)
{	
	//also display time, length etc....
	fout2<<"<hr><br><h3>";

	fout2<<"<br>Original Genome Sequence:"<<"<u>"<<orig<<"</u>";
        fout2<<"<br>Length of Sequence: <u>"<<strlen(orig)<<"</u>";

	fout2<<"<br>Final Genome Sequence&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;:"<<"<u>"<<merge<<"</u>";
	fout2<<"<br>Length of Sequence: <u>"<<strlen(merge)<<"</u>";
	fout2<<"<br>Time:"<<"<u>"<<time<<"</u> seconds";
	fout2<<"</body></html>";
}
void display_html_original(ofstream& fout2, char **data, int NSQ)
{
	int i,j;
	int len;
//	char seq[DATA+1];

	fout2<<"<hr><h6>";
	fout2<<"<table><tr>";

	for(i=0; i<NSQ; i++)
	{
		fout2<<"<td>";
		if(strlen(data[i])<16 && strlen(data[i])> 0)
		{
			
			if(i%2)
			{
				fout2<<"<table border=2 bordercolor=\"black\"><tr><td>";
				fout2<<data[i]<<"</td></tr></table>";
			}
			else
			{
				fout2<<"<table border=2 bordercolor=\"red\"><tr><td>";
				fout2<<data[i]<<"</td></tr></table>";
			}
		}
		else if(strlen(data[i])>=16)
		{
			len=strlen(data[i]);
			if(i%2)
			{
				
				fout2<<"<table border=2 bordercolor=\"black\"><tr><td>";
				fout2<<data[i][0]<<data[i][1]<<data[i][2]<<data[i][3];
				fout2<<data[i][4]<<data[i][5]<<data[i][6]<<data[i][7];
				fout2<<data[i][8]<<"..."<<data[i][len-8]<<data[i][len-7]<<data[i][len-6];
				fout2<<data[i][len-4]<<data[i][len-3]<<data[i][len-2]<<data[i][len-1]<<"</td></tr></table>";
		//		fout2<<"<br><br>-------"<<data[i]<<"-----";
			}	
			else
			{
				fout2<<"<table border=2 bordercolor=\"red\"><tr><td>";
				fout2<<data[i][0]<<data[i][1]<<data[i][2]<<data[i][3];
				fout2<<data[i][4]<<data[i][5]<<data[i][6]<<data[i][7];
				fout2<<data[i][8]<<"..."<<data[i][len-8]<<data[i][len-7]<<data[i][len-6];
				fout2<<data[i][len-4]<<data[i][len-3]<<data[i][len-2]<<data[i][len-1]<<"</td></tr></table>";
				//fout2<<data[i]<<"</td></tr></table>";
			
			}	
		}
		fout2<<"</td>";
		//	fout2<<"&nbsp\;&nbsp\;&nbsp\;";
	}
	fout2<<"</tr></table>";
}
void display_html_merged(ofstream& fout2, char *merged)
{
	
	int len;
	len=strlen(merged);

	fout2<<"<hr><h4>";
	fout2<<"<br>";
	fout2<<"<table><tr><td color=blue>";
	fout2<<"The longest Subsequence found so far...<br>"<<merged<<"<br>"<<"Lenght ="<<len<<"<br>";

}
void display_error(ofstream& fout2, int SIZE, int NSQ)
{
 //also display time, length etc....
        fout2<<"<html><head></head><body>";

	fout2<<"<hr><br><h3>";

        fout2<<"<br>The Program did not run"<<"<u>"<<"The reason is below"<<"</u>";
        fout2<<"<br>Length of Original Sequence: <u>"<<SIZE<<"</u>";

        fout2<<"<br>Number of SubSequences: <u>"<<NSQ<<"</u>";
        fout2<<" Length of Original Sequence should be greater than 10 and number of subsequences should be more than 2";
        fout2<<"</body></html>";

}











