/* This is a SIGNED version - Works OK! on ALL data? Last Checked 15/12/2004*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

void jacobi(double **, int, double *, double **, int *);
void eigsrt(double *, double **, int);

int main(int argc, char *argv[]){
  FILE *fp, *fpPCA;
  int row, col, ban, hea, half, sum, d, pixelsize, nul, noc, nrot, n;
  int *mini, *maxi, *histo, *med, *shisto, *pixelcount;
  unsigned char *p;
  short int *pp, xx, *eb;
  float fs;
  double *s, *ss, *sxy, *entropy, **a, *eval, **evec, seval, speval;
  register int r, b, c, bb, i, j;

  if(argc!=9){
    puts(" * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
    puts(" *                                                                       *");
    puts(" * PCA_8_16:   Calculate PCA on BIL datasets.                            *");
    puts(" *                                                                       *");
    puts(" *         -   Input parameters (at command line) are:                   *");
    puts(" *                 -        Dataset filename                             *");
    puts(" *                 -        Rows                                         *");
    puts(" *                 -        Columns                                      *");
    puts(" *                 -        Bands                                        *");
    puts(" *                 -        Header size                                  *");
    puts(" *                 -        Null cell value [-1 = none]                  *");
    puts(" *                 -        Number of components [-1 = max, 0 = none ]   *");
    puts(" *                 -        PC dataset filename (LSB)                    *");
    puts(" *         -   Output parameters (on output) for each band are:          *");
    puts(" *                 -        Active cell count                            *");
    puts(" *                 -        Minimum                                      *");
    puts(" *                 -        Maximum                                      *");
    puts(" *                 -        Average                                      *");
    puts(" *                 -        Median                                       *");
    puts(" *                 -        Entropy                                      *");
    puts(" *                 -        Std. Deviation                               *");
    puts(" *                 -        Variance-covariance matrix                   *");
    puts(" *                 -        Correlation matrix                           *");
    puts(" *                 -        Eigenvalues 				                         *");
    puts(" *                 -        Eigenvectors 				                         *");
    puts(" *         -   Works on 8-16 bit of signed integers                      *");
    puts(" *                                                                       *");
    puts(" * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
    puts(" *                                                                       *");
    puts(" * PCA_8_16    by Christos Iossifides (c) January 2003-04 NaTUReS Lab.   *");
    puts(" *                                                                       *");
    puts(" * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
    system("PAUSE");
    exit(1);
    }
  if((fp=fopen(argv[1],"rb"))==NULL){
    printf("PCA_8_16 reports: Bad filename <%s>.\n",argv[1]);
    exit(1);
    }
  row=atoi(argv[2]);
  col=atoi(argv[3]);
  ban=atoi(argv[4]);
  hea=atoi(argv[5]);
  nul=atoi(argv[6]);
  noc=atoi(argv[7]);
  if (row<1 ||col<1|| ban<1|| hea<0|| hea>=row*col*ban*(int)sizeof(short)){
    printf("PCA_8_16 reports: Bad data <%d %d %d %d>.\n",row,col,ban,hea);
    exit(1);
    }
  if (noc==-1||noc>0)
	  if((fpPCA=fopen(argv[8],"wb"))==NULL){
  	  printf("PCA_8_16 reports: Bad filename <%s>.\n",argv[8]);
    	exit(1);
	    }
  if (fseek(fp,0,SEEK_END)){
    puts("PCA_8_16 reports: Bad seek.");
    exit(1);
    }
  pixelsize=(ftell(fp)-hea)/row/col/ban;
  printf("\nImage <%s> has %d bits per pixel and :\n\n", argv[1], pixelsize*sizeof(char)*8);
  if (fseek(fp,0,SEEK_SET)){
    puts("PCA_8_16 reports: Bad seek.");
    exit(1);
    }
  if (pixelsize<1||pixelsize>2){
    printf("PCA_8_16 reports: Bad pixel size <%d %d %d %d -> %d>.\n",row,col,ban,hea,pixelsize);
    exit(1);
    }
  if(hea&&fseek(fp,hea,SEEK_SET)){
    puts("PCA_8_16 reports: Bad seek.");
    exit(1);
    }
  if((pp=(short int *)p=(unsigned char *)malloc(col*ban*pixelsize))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
  if((mini=malloc(ban*sizeof(int)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
  if((maxi=calloc(ban,sizeof(int)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
  if((med=malloc(ban*sizeof(int)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
  if((eb=malloc(ban*sizeof(short int)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
  if((pixelcount=calloc(ban,sizeof(int)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
  if((s=calloc(ban,sizeof(double)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
  if((ss=calloc(ban,sizeof(double)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
  if((sxy=calloc(ban*ban,sizeof(double)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
  if((entropy=calloc(ban,sizeof(double)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
  if((histo=calloc(ban*65535,sizeof(int)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
  for (b=0;b<ban;b++)
  	eb[b]=b+1;
  for (b=0;b<ban;b++){
    mini[b]=65536;
    maxi[b]=-32769;
  }
  for (r=0;r<row;r++){
    if(fread(p,pixelsize,col*ban,fp)!=(size_t)col*ban){
      puts("PCA_8_16 reports: EOF encountered.");
      exit(1);
      }
    for (b=0;b<ban;b++)
      for (c=0;c<col;c++){
        d=(int)(pixelsize-1)?pp[b*col+c]:p[b*col+c];
        if (nul<0||nul!=d){
          pixelcount[b]++;
          s[b]+=d;
          ss[b]+=d*d;
          histo[b*65535+(d+32768)]++;
          if(mini[b]>(int)d)
            mini[b]=(int)d;
          if(maxi[b]<(int)d)
            maxi[b]=(int)d;
          }    
        }
      for (c=0;c<col;c++)
        for (b=0;b<ban;b++)
          for (bb=0;bb<ban;bb++)
		    		sxy[b*ban+bb]+=((int)(pixelsize-1)?pp[b*col+c]:p[b*col+c])*((int)(pixelsize-1)?pp[bb*col+c]:p[bb*col+c]);
fprintf(stderr,"PASS 1/2: Calculate statistics at line %d of %d [%2.0f%].          \r", r+1,row,(float)r/row*100);		    		
    }
  rewind(fp);  
  for (b=0;b<ban;b++){
    sum=0;
    if (mini[b]!=maxi[b])
		  for (c=mini[b];c<=maxi[b];c++){
        sum+=histo[b*65535+(c+32768)];
   	    if (sum<pixelcount[b]/2)
     	    med[b]=c+32768;
       	seval=(double)sum/pixelcount[b];
	      entropy[b]+=-seval*log(seval)/0.69314718055994530941723212145818;//log(2.0)
        }
    else
    	eb[b]=0;
    }
    
  printf("\n Band | Act. Cells | Minimum | Maximum |   Average  |  Median |  Entropy   |   Std. Deviation");
  printf("\n------+------------+---------+---------+------------+---------+------------+------------------");
  for (b=0;b<ban;b++)
    if (eb[b])
	  printf("\n %4d | %10d | %7d | %7d | %10.3lf | %7d | %10.4lf |  %14.8lf",
               b+1, pixelcount[b], mini[b], maxi[b], s[b]/pixelcount[b], med[b], entropy[b], sqrt(ss[b]/pixelcount[b]-s[b]*s[b]/pixelcount[b]/pixelcount[b]));
    else
	  printf("\n %4d | %10d | %7d | %7d | ---------- | ------- | ---------- |  --------------",
               b+1, pixelcount[b], mini[b], maxi[b]);
  printf("\n\n Variance-covariance Matrix [%d]x[%d] : \n", ban,ban);
  for (b=0;b<ban;b++){
	  for (bb=0;bb<=b;bb++)
      if (eb[b]&&eb[bb])
			  printf(" %15.8lf",(sxy[b*ban+bb]=((sxy[b*ban+bb]-(s[b]*s[bb])/pixelcount[b])/(pixelcount[b]-1))));
			else
			  printf(" -------------- ");
		printf("\n");
	  }

  printf("\n\n Correlation Matrix [%d]x[%d] : \n", ban,ban);

  n=0;
  for (b=0;b<ban;b++){
	  for (bb=0;bb<=b;bb++)
      if (eb[b]&&eb[bb])
			  printf(" %7.4lf",sxy[b*ban+bb]/(sqrt(ss[b]/pixelcount[b]-s[b]*s[b]/pixelcount[b]/pixelcount[b])*sqrt(ss[bb]/pixelcount[bb]-s[bb]*s[bb]/pixelcount[bb]/pixelcount[bb]) ) );
			else
			  printf(" ------ ");
    if (eb[b])
      n++;
		printf("\n");
	  }
///////////////////////////
  for (b=0;b<ban;b++)
	  printf("%4d",eb[b]);
	printf("\n");

  for (bb=b=0;b<ban;b++)
    if (eb[b])
    	eb[b]=++bb;

  for (b=0;b<ban;b++)
	  printf("%4d",eb[b]);
	printf("\n");
///////////////////////////

	if ((a=(double **)malloc(n*sizeof(double *)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
	if ((evec=(double **)malloc(n*sizeof(double *)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
	if ((eval=(double *)malloc(n*sizeof(double)))==NULL){
    puts("PCA_8_16 reports: Bad memory allocation.");
    exit(1);
    }
	for (i=0;i<n;i++){
		if ((a[i]=(double *)malloc(n*sizeof(double)))==NULL){
	    puts("PCA_8_16 reports: Bad memory allocation.");
  	  exit(1);
    	}
		if ((evec[i]=(double *)malloc(n*sizeof(double)))==NULL){
	    puts("PCA_8_16 reports: Bad memory allocation.");
  	  exit(1);
    	}
		}

  for (i=b=0;b<ban;b++)
    if (eb[b]){
		  for (j=bb=0;bb<=b;bb++)
  	    if (eb[b]){
				  a[i][j]=sxy[b*ban+bb];
				  j++;
				}
			i++;
		}
	for (i=0;i<n;i++)
		for (j=i+1;j<n;j++)
			a[i][j]=a[j][i];

	jacobi(a, n, eval, evec, &nrot);
	eigsrt(eval, evec, n);
	printf("\nNumber of rotations on Jacobi algorithm is : %d.\n", nrot);

	printf("\nEigenvalues : \n");
	seval=0.0;
	for (j=0;j<n;j++){
		printf("%16.6lf",eval[j]);
		seval+=eval[j];
	}
	printf("%16.6lf\n",seval);
	
	printf("\nPercentage Eigenvalues : \n");
	speval=0.0;
	for (j=0;j<n;j++){
		printf("%16.6lf",eval[j]*100/seval);
		speval+=eval[j]*100/seval;
	}
	printf("%16.6lf\n",speval);
	
	printf("\nCumulative Percentage Eigenvalues : \n");
	speval=0.0;
	for (j=0;j<n;j++)
		printf("%16.6lf",speval+=eval[j]*100/seval);
	printf("%16.6lf\n",speval);

	printf("\nEigenvectors : \n");
	for (i=0;i<n;i++,printf("\n"))
		for (j=0;j<n;j++)
			printf(" %15.12lf",evec[i][j]);

  if (noc==-1||(noc<=n&&noc>0)){
		if (noc==-1)
			noc=n;
	  for (r=0;r<row;r++){
  	  if(fread(p,pixelsize,col*ban,fp)!=(size_t)col*ban){
    	  puts("PCA_8_16 reports: EOF encountered.");
      	exit(1);
	      }

			for (bb=0;bb<noc;bb++){
 	  		for (c=0;c<col;c++){
					seval=0.0;
		      for (b=0;b<ban;b++)
					  if (eb[b]){
			        d=(int)(pixelsize-1)?pp[b*col+c]:p[b*col+c];
 	 	  	  	  seval+=d*evec[eb[b]-1][bb];
						}
					fs=(float)seval;
					if (fwrite(&fs,sizeof(float),1,fpPCA)!=1){
 			    	puts("PCA_8_16 reports: Disk full.");
 	  		  	exit(1);
 	    		}
				}
			}
fprintf(stderr,"PASS 2/2: Calculate Pr. components at line %d of %d [%2.0f%].            \r", r+1,row,(float)r/row*100);		    		
    }
	}
	fclose(fp);
 	if (noc)
	  fclose(fpPCA);
  printf("\n\nPCA_8_16 by Ch Iossif (c) Jan 2004 NaTUReS Lab.\n");

  return 0;
}

/*
	Computes all eigenvalues and eigenvectors of a real symmetric matrix
	a[1..n][1..n]. On output, elements of a above the diagonal are destroyed.
	d[1..n] returns the eigenvalues of a.	v[1..n][1..n] is a matrix whose
	columns contain, on output, the normalized eigenvectors of a.
	nrot returns the number of Jacobi rotations that were required.
*/

/* ROTATE is needed by jacobi routine */
#define ROTATE(a,i,j,k,l) {g=a[i][j];h=a[k][l];a[i][j]=g-s*(h+g*tau);a[k][l]=h+s*(g-h*tau);}

void jacobi(double **a, int n, double *d, double **v, int *nrot){
	int j,iq,ip,i;
	double tresh,theta,tau,t,sm,s,h,g,c,*b,*z;
	
	if ((b=(double *)malloc(n*sizeof(double)))==NULL)
		exit(1);
	if ((z=(double *)malloc(n*sizeof(double)))==NULL)
    exit(1);
	for(ip=0;ip<n;ip++){
		for(iq=0;iq<n;iq++)
			v[ip][iq]=0.0;
		v[ip][ip]=1.0;
		}
  for(ip=0;ip<n;ip++){
		d[ip]=b[ip]=a[ip][ip];
		z[ip]=0.0;
		}
  *nrot=0;
	for(i=1;i<=50;i++){
		sm=0.0;
		for(ip=0;ip<n-1;ip++){
			for(iq=ip+1;iq<n;iq++)
				sm+=fabs(a[ip][iq]);
			}
		if(fabs(sm)<1.0e-14){
			free(z);
			free(b);
			return;
			}
		if(i<4)
			tresh=0.2*sm/(n*n);
		else
			tresh=0.0;
		for(ip=0;ip<n-1;ip++){
			for(iq=ip+1;iq<n;iq++){
				g=100.0*fabs(a[ip][iq]);
				if (i>4&&(fabs(d[ip])+g)==fabs(d[ip])&&(fabs(d[iq])+g)==fabs(d[iq]))
					a[ip][iq]=0.0;
				else if(fabs(a[ip][iq])>tresh){
					h=d[iq]-d[ip];
					if((fabs(h)+g)==fabs(h))
						t=(a[ip][iq])/h;
		    	else{
						theta=0.5*h/(a[ip][iq]);
						t=1.0/(fabs(theta)+sqrt(1.0+theta*theta));
						if (theta<0.0)
							t=-t;
		    		}
		    	c=1.0/sqrt(1+t*t);
		    	s=t*c;
		    	tau=s/(1.0+c);
		    	h=t*a[ip][iq];
		    	z[ip]-=h;
		    	z[iq]+=h;
		    	d[ip]-=h;
		    	d[iq]+=h;
		    	a[ip][iq]=0.0;
		    	for(j=0;j<=ip-1;j++){
						ROTATE(a,j,ip,j,iq)
						}
					for(j=ip+1;j<=iq-1;j++){
			    	ROTATE(a,ip,j,j,iq)
			    	}
					for(j=iq+1;j<n;j++){
				    ROTATE(a,ip,j,iq,j)
			  	  }
			    for(j=0;j<n;j++){
						ROTATE(v,j,ip,j,iq)
						}
			    ++(*nrot);
					}
		    }
			}
		for(ip=0;ip<n;ip++){
			b[ip]+=z[ip];
			d[ip]=b[ip];
			z[ip]=0.0;
			}
		}
	}

/*
	Given the eigenvalues d[1..n] and eigenvectors v[1..n][1..n] as output from jacobi
	(x11.1) or tqli(x11.3), this routine sorts the eigenvalues into descending order,
	and rearranges the columns of v correspondingly. The method is straight insertion.
*/
void eigsrt(double d[],double **v,int n){
	int k,j,i;
	double p;
	
	for(i=0;i<n-1;i++){
		p=d[k=i];
		for(j=i+1;j<n;j++)
			if(d[j]>=p)
				p=d[k=j];
		if(k!=i){
			d[k]=d[i];
			d[i]=p;
			for(j=0;j<n;j++){
				p=v[j][i];
				v[j][i]=v[j][k];
				v[j][k]=p;
				}
			}
	  }
	}

