#include #include #include #include #define SIZE 300 //#define NSQ 100 // no of sub sequences #define display 0 void Subsequence(int, int); void splicer(); void LCS(char *, char*, char *, int, int&); void substring(char* ,char* , int , int ); void mergeSub(char* ,char* ,char* , char * , int); void align(char**, int, char*, ofstream&); void display_html_original(ofstream& , char **, int); void display_html_1(ofstream&, char** ); void display_html_2(ofstream&, char* ,char*, float); void display_html_merged(ofstream&, char *); //ofstream fout2; void main(int argc, char* argv[]) { char newSeq[SIZE+1], nextSeq[SIZE+1], overlap[SIZE+1], mergedSeq[SIZE+1]; char mainSeq[SIZE+1]; int noSeq; int overlapSize,oldOverSize=0; int type; //int NSQ=100; int NSQ=atoi(argv[1]); time_t seconds1, seconds2; float totaltime; int i,j; ifstream ifp; ifp.open("subseq.dat"); //ifp>>noSeq; //same as NSQ, for test data ofstream fout2; if(display==1) fout2.open("../public_html/code/genome.html"); char **data; data= new char*[NSQ]; for(i=0; i endse) { temp=start; start=endse; endse=temp; } while(start==endse)// regenerate sub sequence if empty { start=rand()%300; endse=rand()%300; if(start > endse) { temp=start; start=endse; endse=temp; } } cout<<"Start "<0 && type>0) //has a match, creates an empty mergedSubst if not checked, to be used later to define size of overlap { mergeSub(data[i], data[j],overlap, mergedSeq, type); //show only new sequences if((strlen(mergedSeq) > strlen(data[i])) && (strlen(mergedSeq) > strlen(data[j]))) { // cout<<"\n..........................................................."<= strlen(maxSeq)) { memcpy(maxSeq,mergedSeq,strlen(mergedSeq)*sizeof(char)+1); } } } }//for //copy the longest sub sequence memcpy(data[i], maxSeq, strlen(maxSeq)*sizeof(char)+1); i++; }//while ////Combine the results into one final sequence j=0; int oldmax; maxlen=0; while((j < nsq) && (maxlen<300)) { oldmax=maxlen; maxSeq[0]='\0'; if(strlen(data[j])!=0) { for(i=0; i0 && type>0) //has a match, creates an empty mergedSubst if not checked, to be used later to define size of overlap { mergeSub(data[j], data[i],overlap, mergedSeq, type); //show only new sequences if((strlen(mergedSeq) > strlen(data[i])) && (strlen(mergedSeq) > strlen(data[j]))) { // cout<<"\n..........................................................."<= strlen(maxSeq)) { //if(display) //display_html_original(fout2,data, nsq); memcpy(maxSeq,mergedSeq,strlen(mergedSeq)*sizeof(char)+1); if(strlen(mergedSeq) >= maxlen) maxlen=strlen(maxSeq); } }//if }//if }//for //copy the longest sub sequence data[j]=maxSeq; //cout<<"new res="<= strlen(mRes)) { if(display) display_html_original(fout2, data, nsq); memcpy(mRes,data[j],strlen(data[j])*sizeof(char)+1); //cout<<"Max so far"< oldmax)) { j=0; } }//while } /********************************************** Function to put back the subsequences, using LCS -Dynamic programming Notes:Conventions used -1 for up, -2 for left and -3 for up and left in the table x and y are the sub sequences to be compared, y is the overlap, style is the type of overlap contiguous for 2 and 1 for non-contiguous. **********************************************/ void LCS(char *x, char *y, char *z, int style, int &type) { //variables int i,j, xstart, ystart, xend, yend; int x_l, y_l; //cout<<"***********************"<=c[i][j-1]) { c[i][j]=c[i-1][j]; b[i][j]=-1; } else { c[i][j]=c[i][j-1]; b[i][j]=-2; } } //the largest no is the length of the common string int len=c[x_l][y_l]; int start, stop=0, max, oldstart, oldstop, ctr, len2=0; int jcopy; z[len]='\0'; i=x_l; j=y_l; //back track oldstop=0; stop=0; ctr=0; oldstart=0; yend=0; start=i; jcopy=j; //if(style==1) { while(i>0 || j>0) { if(b[i][j]==-3) { i--; j--; len--; z[len]=x[i]; if(ctr==0) { start=i; jcopy=j; stop=0; ctr=1; } else { stop++; if(i==0) { if(stop>=oldstop) { oldstop=stop; oldstart=start; yend=jcopy; } } } } else if(b[i][j]==-1) { i--; if(stop>=oldstop) { oldstop=stop; oldstart=start; yend=jcopy; } ctr=0; } else if(b[i][j]==-2) { j--; if(stop>=oldstop) { oldstop=stop; oldstart=start; yend=jcopy; } ctr=0; } } } //style 1 int i1,j1, count=0, oldcount=0; i=oldstart-oldstop+1; j=yend-oldstop+1; oldstart=xstart=i; ystart=j; if(style==2) { while( i<=x_l && j<=y_l &&(b[i][j]==-3)) { i++; j++; count++; } xend=oldstart=i-1; yend=j-1; oldstop=count; } if(oldstop ==0) { z[0]='\0'; type=0; // no match } else if(oldstop>0 && style==2) { xstart=oldstart-oldstop; xend=oldstart; ystart=yend-oldstop; oldstart--; substring(z, x, xstart, oldstart); //we only want it if its more than 1 char //Determine if the common substring is at the ends if((xend ==x_l) && (ystart==0)) //suffix of x prefix of y { type = 1; } else if((xstart==0) && (yend==y_l)) //suffix of y prefix of x { type = 2; } else type = 0; // not a suffix or prefix } //free space for(i=0; i<=x_l; i++) { delete[] b[i]; delete[] c[i]; } delete[] b; delete[] c; }// if x_l !=0 .... } /********************************************** Function to merge two subsequences based on an exact match at the edges. **********************************************/ void mergeSub(char* x,char* y,char* overlap, char * z, int type) { int i,j; int len_m=strlen(overlap); int len_x=strlen(x); int len_y=strlen(y); if(type==1) // suffix of x prefix of y { strcpy(z,x); //copy x to new string for(i=len_x, j=len_m; j"; //display_html_original(fout2,data); } void display_html_2(ofstream& fout2, char *orig, char *merge, float time) { //also display time, length etc.... fout2<<"

"; fout2<<"
Original Genome Sequence:"<<""<"; fout2<<"
Length of Sequence: "<"; fout2<<"
Final Genome Sequence     :"<<""<"; fout2<<"
Length of Sequence: "<"; fout2<<"
Time:"<<""< seconds"; fout2<<""; } void display_html_original(ofstream& fout2, char **data, int NSQ) { int i,j; int len; // char seq[DATA+1]; fout2<<"
"; fout2<<""; for(i=0; i"; if(strlen(data[i])<16 && strlen(data[i])> 0) { if(i%2) { fout2<<"
"; fout2<
"; } else { fout2<<"
"; fout2<
"; } } else if(strlen(data[i])>=16) { len=strlen(data[i]); if(i%2) { fout2<<"
"; fout2<
"; // fout2<<"

-------"<"; fout2<"; //fout2<"; } } fout2<<""; // fout2<<" \; \; \;"; } fout2<<""; } void display_html_merged(ofstream& fout2, char *merged) { int len; len=strlen(merged); fout2<<"

"; fout2<<"
"; fout2<<"
"; fout2<<"The longest Subsequence found so far...
"<"<<"Lenght ="<"; }