6)||(pwidth-10-h<(2*patternwidth)))
break;
fprintf(Fptxt," ");
h++;
}
first=FALSE;
fputc(AlignPair.textsecnd[j++],Fptxt);
h++;
}
fprintf(Fptxt,"\n\n");
g=j;
if (j==AlignPair.length+1) break;
}
if(AlignPair.indexprime[AlignPair.length]!=Length)
{
m=AlignPair.indexprime[AlignPair.length]+10;
if (m>Length) m=Length;
j=AlignPair.indexprime[AlignPair.length]+1;
fprintf(Fptxt," %9d ",j);
for(i=1;i<=10;i++)
{
fputc(Sequence[j],Fptxt);
j++;
if(j>m) break;
}
fprintf(Fptxt,"\n\n");
}
}
else
fprintf(Fptxt,"Error, pwidth<=0, can't report alignments.\n");
}
void print_alignment_headings(int consensuslength)
{
/* headings */
if (Heading==0)
{
Heading=1;
}
fprintf(Fptxt,"\n\n",
AlignPair.indexprime[1],
AlignPair.indexprime[AlignPair.length],Period,Copynumber,consensuslength,(int) OUTPUTcount);
fprintf(Fptxt,"");
#if defined(WINDOWSGUI)
fprintf(Fptxt,"See Alignment Explanation in Tandem Repeats Finder Help
\n");
#elif defined(WINDOWSCONSOLE)
fprintf(Fptxt,"Alignment explanation
\n");
#elif defined(UNIXGUI)
fprintf(Fptxt,"See Alignment Explanation in Tandem Repeats Finder Help
\n");
#elif defined(UNIXCONSOLE)
fprintf(Fptxt,"Alignment explanation
\n");
#endif
fprintf(Fptxt," Indices: %d--%d",
AlignPair.indexprime[1],
AlignPair.indexprime[AlignPair.length]);
fprintf(Fptxt," Score: %d",Maxscore);
fprintf(Fptxt,"\n Period size: %d Copynumber: %3.1f Consensus size: %d\n\n",
Period,Copynumber,consensuslength);
}
#if WEIGHTCONSENSUS
/*******************************************************************/
/**************************** get_consensus() ********************/
/*******************************************************************/
void get_consensus(int patternsize)
{
int c,lastindex,j,i,max,letters,newinsert,insertA,insertC,insertG,insertT;
char maxchar;
/* initialize counts */
for (c=0;c<=2*(MAXPATTERNSIZE);c++)
{
Consensus.A[c]=0;
Consensus.C[c]=0;
Consensus.G[c]=0;
Consensus.T[c]=0;
Consensus.dash[c]=0;
Consensus.insert[c]=0; /* times insert occurs */
Consensus.letters[c]=0; /* number of letters in all inserts */
Consensus.total[c]=0; /* occurrences of a position */
Consensus.pattern[c]=DASH;
}
/* start consensus */
lastindex=-1;
i=1;
while (i<=AlignPair.length)
{
if (AlignPair.indexsecnd[i]!=lastindex)
{
switch(AlignPair.textprime[i]){
case 'A':
Consensus.A[2*AlignPair.indexsecnd[i]+1]++;
break;
case 'C':
Consensus.C[2*AlignPair.indexsecnd[i]+1]++;
break;
case 'G':
Consensus.G[2*AlignPair.indexsecnd[i]+1]++;
break;
case 'T':
Consensus.T[2*AlignPair.indexsecnd[i]+1]++;
break;
case DASH:
Consensus.dash[2*AlignPair.indexsecnd[i]+1]++;
break;
}
Consensus.total[2*AlignPair.indexsecnd[i]+1]++;
if (lastindex!=-1)
{
if (AlignPair.indexsecnd[i]==patternsize-1)
{
Consensus.total[0]++;
}
else
{
Consensus.total[2*AlignPair.indexsecnd[i]+2]++;
}
}
lastindex=AlignPair.indexsecnd[i];
i++;
}
else /* AlignPair.indexsecnd[i]==lastindex */
{
Consensus.insert[2*AlignPair.indexsecnd[i]]++;
insertA=0;
insertC=0;
insertG=0;
insertT=0;
while((AlignPair.indexsecnd[i]==lastindex)
&&
(i<=AlignPair.length))
{
switch(AlignPair.textprime[i]){
case 'A':
if (insertA==0)
{
Consensus.A[2*AlignPair.indexsecnd[i]]++;
insertA=1;
break;
}
case 'C':
if (insertC==0)
{
Consensus.C[2*AlignPair.indexsecnd[i]]++;
insertC=1;
break;
}
case 'G':
if (insertG==0)
{
Consensus.G[2*AlignPair.indexsecnd[i]]++;
insertG=1;
break;
}
case 'T':
if (insertT==0)
{
Consensus.T[2*AlignPair.indexsecnd[i]]++;
insertT=1;
break;
}
}
Consensus.letters[2*AlignPair.indexsecnd[i]]++;
i++;
}
}
}
/* get consensus for letters that exist */
for(i=1;i<=2*(patternlength);i+=2)
{
letters=Consensus.total[i]-Consensus.dash[i];
max=Consensus.A[i];maxchar='A';
if(max=(letters*Delta))
Consensus.pattern[i]=maxchar;
else Consensus.pattern[i]=DASH;
}
/* get consensus for inserted letters */
for(i=0;i<=2*(patternlength);i+=2)
{
if (Consensus.total[i]!=0)
{
max=Consensus.A[i];maxchar='A';
if(max
(Consensus.letters[i]*Delta))
Consensus.pattern[i]=maxchar;
else Consensus.pattern[i]=DASH;
}
}
/* compress consensus */
j=0;
for(i=0;i<=2*patternlength;i++)
if (Consensus.pattern[i]!=DASH)
{
Consensus.pattern[j]=Consensus.pattern[i];
j++;
}
ConsClasslength=j;
}
#else
/*******************************************************************/
/**************************** get_consensus() ********************/
/*******************************************************************/
void get_consensus(int patternlength)
{
int c,lastindex,j,i,max;
char maxchar;
/* initialize counts */
for (c=0;c<=2*(MAXPATTERNSIZE);c++)
{
Consensus.A[c]=0;
Consensus.C[c]=0;
Consensus.G[c]=0;
Consensus.T[c]=0;
Consensus.dash[c]=0;
Consensus.insert[c]=0;
Consensus.total[c]=0;
Consensus.pattern[c]=DASH;
}
/* start consensus */
lastindex=-1;
i=1;
while (i<=AlignPair.length)
{
if (AlignPair.indexsecnd[i]!=lastindex)
{
switch(AlignPair.textprime[i]){
case 'A':
Consensus.A[2*AlignPair.indexsecnd[i]+1]++;
break;
case 'C':
Consensus.C[2*AlignPair.indexsecnd[i]+1]++;
break;
case 'G':
Consensus.G[2*AlignPair.indexsecnd[i]+1]++;
break;
case 'T':
Consensus.T[2*AlignPair.indexsecnd[i]+1]++;
break;
case DASH:
Consensus.dash[2*AlignPair.indexsecnd[i]+1]++;
break;
}
if (lastindex!=-1)
{
if (AlignPair.indexsecnd[i]==patternlength-1)
{
Consensus.total[0]++;
}
else
{
Consensus.total[2*AlignPair.indexsecnd[i]+2]++;
}
}
lastindex=AlignPair.indexsecnd[i];
i++;
}
else /* AlignPair.indexsecnd[i]==lastindex */
{
Consensus.insert[2*AlignPair.indexsecnd[i]]++;
while((AlignPair.indexsecnd[i]==lastindex)
&&
(i<=AlignPair.length))
{
switch(AlignPair.textprime[i]){
case 'A':
Consensus.A[2*AlignPair.indexsecnd[i]]++;
break;
case 'C':
Consensus.C[2*AlignPair.indexsecnd[i]]++;
break;
case 'G':
Consensus.G[2*AlignPair.indexsecnd[i]]++;
break;
case 'T':
Consensus.T[2*AlignPair.indexsecnd[i]]++;
break;
case DASH:
Consensus.dash[2*AlignPair.indexsecnd[i]]++;
break;
}
i++;
}
}
}
for(i=1;i<=2*(patternlength);i+=2)
{
max=Consensus.dash[i];
maxchar=DASH;
if(max=0.5))
{
max=Consensus.A[i];maxchar='A';
if(maxhighindex);
lo=&(list->lowindex);
z=&(list->numentries);
m=&(list->nummatches);
if ((*z)!=0) /* there exist previous entries on this list */
{
/* first remove trailing entries, i.e., */
/* those with locationentry[*lo].location0))
{
(*z)--;
(*m)-=list->entry[*lo].size;
(*lo)++;
(*lo)%=windowsize;
}
}
if(((*z)!=0)&&(list->entry[*hi].location==location-1))
/* there are still more entries and this is an adjacent tuple, */
/* just add on to last entry */
{
list->entry[*hi].location++;
list->entry[*hi].size++;
(*m)++;
}
else /* need a new entry here */
{
(*z)++;
(*hi)++;
(*hi)%=windowsize;
list->entry[*hi].location=location;
list->entry[*hi].size=size;
(*m)+=size;
}
}
void link_Distance_window(int d)
{
int t,f,h;
/* get next highest tag */
t=(int)ceil(d/TAGSEP);
if(Tag[t]d) /* follow links to insert d */
{
f=Tag[t];
while(f>d) f=Distance[f].linkdown;
/* f is first linked index below d */
if (f==d)trf_message("\nTag error following links. f==d=%d",d);
}
else if(Tag[t]==d)
{
trf_message("\nTag error Tag[%d]=%d",t,d);
exit(-2);
}
/* link in d */
Distance[d].linkdown=f;
h=Distance[f].linkup;
Distance[d].linkup=h;
Distance[f].linkup=d;
if(h<=MAXDISTANCE) Distance[h].linkdown=d;
Distance[d].linked=TRUE;
}
void untag_Distance_window(int d, int linkdown)
{
int t;
/* get next highest tag */
t=(int)ceil(d/TAGSEP);
if(Tag[t]!=d) return; /* Tag[t] is the largest index less or */
/* equal to (t)x(TAGSEP) that is linked */
else
{
while((t<=Toptag)&&(Tag[t]==d)) /* check higer tags and replace d */
/* with its linkdown */
{
Tag[t]=linkdown;
t++;
}
}
}
int no_matches_so_unlink_Distance(int d, int location,
struct distancelist *objptr)
{
int *lo, *hi, g, h;
int *z, *m;
int windowleftend, windowsize;
struct distancelist *list;
windowsize=max(d,Min_Distance_Entries)+1; /* this value is used to */
/* mod the index to the */
/* entries. The entries */
/* run from 0 to */
/* max(d,Min_Distance_Entries) */
list=&(objptr[d]);
hi=&(list->highindex);
lo=&(list->lowindex);
z=&(list->numentries);
m=&(list->nummatches);
if ((*z)!=0) /* there exist previous entries on this list */
{
/* first remove trailing entries, i.e., */
/* those with locationentry[*lo].location0))
{
(*z)--;
(*m)-=list->entry[*lo].size;
(*lo)++;
(*lo)%=windowsize;
if((*lo)>windowsize)trf_message("\n no_matches; *lo:%d windowsize:%d",*lo,windowsize);
}
}
if((*z)==0) /* no more matches, so unlink */
{
/* given a distance d with zero matches, unlink from list */
g=Distance[d].linkdown;
h=Distance[d].linkup;
Distance[g].linkup=h;
if (h<=MAXDISTANCE) Distance[h].linkdown=g;
Distance[d].linked=FALSE;
/* once unlinked, we must also untag if it is a tag */
untag_Distance_window(d,g);
return(1);
}
return(0);
}
int GetTopPeriods(char* pattern, int length, int* toparray)
{
int topind;
double topval;
int heads[16];
int *history;
double* counts;
int i,t,end,tupid;
int curr,dist;
double n,xysum,xsum,ysum,x2sum,s;
/* allocate an array of counts */
counts = (double*) calloc(length,sizeof(double));
if(counts==NULL) return 1;
/* allocate history array */
history = (int*) malloc(length*sizeof(int));
if(history==NULL)
{
free(counts);
return 1;
}
/* clear the heads array which point into history array */
for(i=0;i<16;i++) heads[i]=-1;
/* scan pattern for tuples of size 2 */
for(i=0,end=length-2;i<=end;i++)
{
/* figure out tuple id */
tupid = Index[pattern[i]]*4+Index[pattern[i+1]];
/* record last occurence into history and update heads[] pointer */
history[i] = heads[tupid];
heads[tupid]=i;
/* loop into history and add distances */
/* 11/17/15 G. Benson */
/* limit maximum length of distance recorded between tuples to MAXDISTANCECONSTANT*3 = 6,000*/
/* this should be long enough to deter finding periods that are not the most frequent */
/* Without this change, this procudure is quadratic in the length, which could be several million */
/* and caused the program to hang with long centromeric repeats */
/* for(curr=i;history[curr]!=-1;curr=history[curr])*/
dist = 0;
for(curr=i;((history[curr]!=-1)&&(dist<(MAXDISTANCECONSTANT*3)));curr=history[curr])
{
dist = i-history[curr];
counts[dist]+=1.0;
}
}
/* compute slope using least-square regression */
xysum=xsum=ysum=x2sum=0.0;
end = length-2;
for(i=1;i<=end;i++)
{
xysum += (i*counts[i]);
xsum += (i);
ysum += (counts[i]);
x2sum += (i*i);
}
n = end;
s = (n*xysum-xsum*ysum)/(n*x2sum-xsum*xsum);
/* flatten trend by adding -s per increment */
end = length-2;
for(i=1;i<=end;i++)
{
counts[i] = counts[i] - i*s;
}
/* pick highest values */
end = length-2;
if(end>MAXDISTANCE) end = MAXDISTANCE; /* 3/14/05 accepts smaller multiples is best ones are too large */
for(t=0;ttopval)
{
topind=i;
topval = counts[i];
}
}
/* copy to array passed as parameter */
toparray[t] = topind;
counts[topind]=0.0;
}
/* free memory */
free(counts);
free(history);
return 0;
}
int multiples_criteria_4(int found_d)
{
int g,lowerindex,upperindex;
int topperiods[NUMBER_OF_PERIODS];
char* pattern;
int length;
lowerindex=AlignPair.indexprime[AlignPair.length];
upperindex=AlignPair.indexprime[1];
pattern = Sequence+lowerindex;
length = upperindex-lowerindex+1;
/* size one is verified based on composition */
/* only returns true if > 80% comp for any nucleotide */
if(found_d==1)
{
int comps[4]={0};
int total=0;
int maxind=0;
float percentmatch;
for(g=0;gcomps[maxind]) maxind=g;
}
percentmatch = comps[maxind]*100.0f/total;
if(percentmatch>=80.0f) return TRUE;
else return FALSE;
}
if(GetTopPeriods(pattern, length, topperiods)) /* gettopperiods returns zero on success */
{
fprintf(stderr,"\nUnable to allocate counts array in GetTopPeriods()!!!");
exit(-1);
}
/* copy into global for compatibility with best_period_list */
/*
for(g=0;gk_run_sums_criteria;
max_first_match_location
= max(0,location - max(d,Min_Distance_Window))
+ main_d_info->waiting_time_criteria;
low_end_of_range=main_d_info->lo_d_range;
high_end_of_range=main_d_info->hi_d_range;
/* get number of matches in d */
main_d_matches=main_d_info->nummatches;
/* set up a test for checking waiting time on range distances */
/* criteria is that they must have at least 35% of the krun matches */
range_d_min_for_waiting_time_test=(int)(0.35*min(min_krun_matches,main_d_matches));
/* calculate the location of the first match for d */
main_d_first_match_location
= main_d_info->entry[main_d_info->lowindex].location /* end of first run of matches */
- main_d_info->entry[main_d_info->lowindex].size /* minus size of run */
+ tuplesize; /* plus tuplesize */
/* test waiting time criteria for d */
waiting_time_ok=0;
waiting_time_d=0;
if(main_d_first_match_location<=max_first_match_location)
{
waiting_time_ok=1;
waiting_time_d=d;
}
/* does d meet all criteria? */
if((main_d_matches>=min_krun_matches)&&(waiting_time_ok))
{
return(TRUE);
}
/* no, so now look in the low range to see if d has most matches */
/* accumulate matches in low range */
/* test for waiting time if main d didn't pass test */
hipointer=d;
lopointer=d;
t=main_d_info->linkdown;
m=main_d_matches;
d_still_best=TRUE;
while((t>=low_end_of_range) && (d_still_best))
{
range_d_info=&(Distance[t]);
s=range_d_info->linkdown;
if(!no_matches_so_unlink_Distance(t,location,Distance))
{
if((range_d_matches=range_d_info->nummatches)>main_d_matches)
d_still_best=FALSE;
else
{
lopointer=t;
m+=range_d_info->nummatches;
if((!waiting_time_ok)&&(range_d_matches>=range_d_min_for_waiting_time_test))
{
/* calculate the location of the first match for range d */
range_d_first_match_location
= range_d_info->entry[range_d_info->lowindex].location /* end of first run of matches */
- range_d_info->entry[range_d_info->lowindex].size /* minus size of run */
+ tuplesize; /* plus tuplesize */
/* do waiting time test; if successful, store t*/
if(range_d_first_match_location<=max_first_match_location)
{
waiting_time_ok=1;
waiting_time_d=t;
}
}
}
}
t=s;
}
/* stop if d not best */
if(!d_still_best)
return(FALSE);
/* now check that d is best in upper range */
t=main_d_info->linkup;
while((t<=high_end_of_range)&&(d_still_best))
{
range_d_info=&(Distance[t]);
s=range_d_info->linkup;
if(!no_matches_so_unlink_Distance(t,location,Distance))
{
if(range_d_info->nummatches>main_d_matches)
d_still_best=FALSE;
}
t=s;
}
/* stop if d not best */
if(!d_still_best)
return(FALSE);
/* now test lowest range */
if((m>=min_krun_matches)&&(waiting_time_ok))
{
return(TRUE);
}
/* lower range didn't work, now test higher ranges */
d_range=d-low_end_of_range+1;
t=main_d_info->linkup;
while(t<=high_end_of_range)
{
range_d_info=&(Distance[t]);
s=range_d_info->linkup;
hipointer=t;
range_d_matches=range_d_info->nummatches;
m+=range_d_matches;
while(lopointer=range_d_min_for_waiting_time_test)
{
/* calculate the location of the first match for range d */
range_d_first_match_location
= range_d_info->entry[range_d_info->lowindex].location /* end of first run of matches */
- range_d_info->entry[range_d_info->lowindex].size /* minus size of run */
+ tuplesize; /* plus tuplesize */
/* do waiting time test; if successful, store t*/
if(range_d_first_match_location<=max_first_match_location)
{
waiting_time_ok=1;
waiting_time_d=t;
}
}
/* now test range */
if((m>=min_krun_matches)&&(waiting_time_ok))
{
return(TRUE);
}
t=s;
}
return(FALSE);
}
/****************************************************/
/******************** flanking sequence *********************/
void print_flanking_sequence(int flank_length)
{
int m,n,k,i,j;
m=AlignPair.indexprime[1]-flank_length;
if (m<1) m=1;
n=AlignPair.indexprime[AlignPair.length]+flank_length;
if (n>Length) n=Length;
if (m==AlignPair.indexprime[1])
{
fprintf(Fptxt,"\nLeft flanking sequence: None");
}
else
{
fprintf(Fptxt,"\nLeft flanking sequence: Indices %d -- %d\n",
m,AlignPair.indexprime[1]-1);
k=AlignPair.indexprime[1];
j=m;
for(;;)
{
for(i=1;i<=pwidth-10;i++)
{
fputc(Sequence[j],Fptxt);
j++;
if(j>=k) break;
}
fprintf(Fptxt,"\n");
if(j>=k) break;
}
}
if (n==AlignPair.indexprime[AlignPair.length])
{
fprintf(Fptxt,"\n\nRight flanking sequence: None");
}
else
{
fprintf(Fptxt,"\n\nRight flanking sequence: Indices %d -- %d\n",
AlignPair.indexprime[AlignPair.length]+1,n);
j=AlignPair.indexprime[AlignPair.length]+1;
for(;;)
{
for(i=1;i<=pwidth-10;i++)
{
fputc(Sequence[j],Fptxt);
j++;
if(j>n) break;
}
fprintf(Fptxt,"\n");
if(j>n) break;
}
}
fprintf(Fptxt,"\n\n");
return;
}
void printECtoAlignments(FILE* fp, int start, int width)
{
int counter;
int chars=0;
fprintf(Fptxt,"\nConsensus pattern (%d bp): ",width);
for(counter=start;counterAlignPair.length)
{
trf_message("\nError in statistics.");
trf_message("\nInitial left pointer exceeds AlignPair.length while");
trf_message("\nlooking for first non -");
exit(-10);
}
}
rp=lp+1;
if(rp>AlignPair.length)
{
trf_message("\nError in statistics.");
trf_message("\nInitial right pointer exceeds AlignPair.length");
exit(-10);
}
while(AlignPair.indexsecnd[rp]!=AlignPair.indexsecnd[lp])
{
rp++;
if(rp>AlignPair.length)
{
trf_message("\nError in statistics.");
trf_message("\nInitial right pointer exceeds AlignPair.length while");
trf_message("\nlooking for AlignPair.indexsecnd[lp]");
exit(-10);
}
}
while(AlignPair.textsecnd[rp]=='-')
{
rp++;
if(rp>AlignPair.length)
{
trf_message("\nError in statistics.");
trf_message("\nInitial right pointer exceeds AlignPair.length while");
trf_message("\nlooking for first non -");
exit(-10);
}
}
if(AlignPair.indexsecnd[lp]!=AlignPair.indexsecnd[rp])
{
trf_message("\nError in statistics.");
trf_message("\nInitial left pointer index not the same as");
trf_message("\ninitial right pointer index");
exit(-10);
}
/* now do the tests */
mindistance=size;
maxdistance=size;
while((rp<=AlignPair.length)&&(lp