/* xlsqr_demo v1.0.0 Ver.: OK (c) 04/10 - Christos Iosifides */
/*
   This program is a demo which evaluates the best 2nd order surface 

   x[0] * px^2    + x[1] * py^2    + x[2] * pz^2   +
   x[3] * px * py + x[4] * py * pz + x[5] * px *pz + 
   x[6] * px      + x[7] * py      + x[8] * pz       = 1

   from a 3-d point (px,py,pz) coordinates.
   Uses xlsqrt to solve the least squares system Ax+Bu=w.

   Name:		xlsqr_demo
   Version:		1.0.0
   Copyright:	        Ch Iossif @ 2010
   Author:		Christos Iosifidis
   System adaptation:   Argiris Martiridis
   Evaluated by:        Argiris Martiridis and Ch Iossif
   Date:		01/04/10 12:00
   Modified:		01/04/10 12:00 - 16:30 468 lines in 4h30' !!!
                        ...
   Description:         Evaluates the best 2nd order surface from a 3-d point cloud.
   Compile:
            Linux:      gcc xlsqr_demo.c -o xlsqr_demo
            Windows:    gcc xlsqr_demo.c -o xlsqr_demo.exe
                        or similar with any ANSI C compiler

  Input:

  PN 3921 points in data.txt file
  px, py, pz coordinates in data.txt file

  NIter 99 maximum iterations
  Epsilon 1e-12 average dx

  M=9 N=3*3921 C=3921 in code
  A(CxM) in code by px py pz
  B(CxN) in code by x px py pz
  P(N) in code as unary
  W(C) in code by x px py pz

  Output:

  max |dX| on each iteration

  X(M)
  Sigma and
  Vx(MxM)

 Copyright (C) Apr 2010 Ch Iossif <chiossif@yahoo.com>

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>

#define myABS(x) (((x)<0)?-(x):(x))
#define mySQR(x) ((x)*(x))

//#define FP_TYPE double
//#define EPSILON 1.e-15
//#define FP_FORMAT "%20.10le",

//#define FP_TYPE long double
//#define EPSILON 1.e-19
//#define FP_FORMAT "%24.15Le",

#define FP_TYPE __float128
#define EPSILON 1.e-34
#define FP_FORMAT "%40.31Le",(long double)

FP_TYPE xlsqr(FP_TYPE *A, FP_TYPE *B, FP_TYPE *P, FP_TYPE *W, FP_TYPE *X, FP_TYPE *U, FP_TYPE *Vx, int M, int N, int C);
FP_TYPE mySQRT(FP_TYPE);
void gpl(void);
int Matrix_inversion(FP_TYPE *,FP_TYPE *,FP_TYPE *,int);
int Gaussian_elimination(FP_TYPE *,FP_TYPE *,FP_TYPE *,int *,int);
int Cholesky_decomposition(FP_TYPE *, FP_TYPE *, int);
void print_mat(const char *, FP_TYPE *, int, int);

int main(void) {
    int m, n, c, pn, iter, niter, k;
    FP_TYPE *a, *b, *p, *w, *x, *u, *vx, *dx, sigma;
    FP_TYPE *px, *py, *pz, epsilon, s1, s2, s3;
    long double t, t1, t2, t3;
    int tmpint;
    register int i, j;

    /* data input */
    pn=100; //    pn=3921;
    k=3921/pn-1;
    px=malloc(pn*sizeof(FP_TYPE));
    py=malloc(pn*sizeof(FP_TYPE));
    pz=malloc(pn*sizeof(FP_TYPE));
    s1=s2=s3=0.0;
    for (i=0;i<pn;i++) {
        scanf("%d %Lf %Lf %Lf ", &tmpint, &t1, &t2, &t3);
        px[i]=t1;
        s1+=t1;
        py[i]=t2;
        s2+=t2;
        pz[i]=t3;
        s3+=t3;
        for (j=0;j<k;j++)
            scanf("%d %Lf %Lf %Lf ", &tmpint, &t1, &t2, &t3);
    }
    s1/=pn;
    s2/=pn;
    s3/=pn;
    for (i=0;i<pn;i++) {
        px[i]-=s1;
        py[i]-=s2;
        pz[i]-=s3;
    }

    /* system initialization */
    niter=99;
    epsilon=EPSILON*1000;
    m=9;
    n=3*pn;
    c=pn;
    a=malloc(c*m*sizeof(FP_TYPE));
    b=malloc(c*n*sizeof(FP_TYPE));
    p=malloc(n*sizeof(FP_TYPE));
    w=malloc(c*sizeof(FP_TYPE));
    x=malloc(m*sizeof(FP_TYPE));
    dx=malloc(m*sizeof(FP_TYPE));
    u=malloc(n*sizeof(FP_TYPE));
    vx=malloc(m*m*sizeof(FP_TYPE));

    /* p(nxn) matrix :-) */
    for (i=0;i<n;i++)
        p[i]=1.0;

    /* x(m) initial vector */
    for (i=0;i<m;i++)
        dx[i]=x[i]=0.0;

    /* a(cxm) matrix */
    for (i=0;i<c;i++) {
        a[i*m+0]=mySQR(px[i]);
        a[i*m+1]=mySQR(py[i]);
        a[i*m+2]=mySQR(pz[i]);
        a[i*m+3]=px[i]*py[i];
        a[i*m+4]=py[i]*pz[i];
        a[i*m+5]=px[i]*pz[i];
        a[i*m+6]=px[i];
        a[i*m+7]=py[i];
        a[i*m+8]=pz[i];
    }

    for (iter=0;iter<niter;iter++) {
        /* b(cxn) matrix */
        for (i=0;i<c*n;i++) {
            b[i]=0.0;
        }
        for (i=0;i<c;i++) {
            b[ i*n+i     ] = 2*x[0]*px[i]+x[3]*py[i]+x[5]*pz[i]+x[6];
            b[ i*n+i+c   ] = 2*x[1]*py[i]+x[3]*px[i]+x[4]*pz[i]+x[7];
            b[ i*n+i+2*c ] = 2*x[2]*pz[i]+x[4]*py[i]+x[5]*px[i]+x[8];
        }

        /* w(c) vector */
        for (i=0;i<c;i++)
            w[i]=1.0-x[0]*mySQR(px[i])-x[1]*mySQR(py[i])-x[2]*mySQR(pz[i])-x[3]*px[i]*py[i]-x[4]*py[i]*pz[i]-x[5]*px[i]*pz[i]-x[6]*px[i]-x[7]*py[i]-x[8]*pz[i];

        /*
        printf("\n");
        print_mat("a",a,c,m);
        print_mat("b",b,c,n);
        print_mat("p",p,1,n);
        print_mat("w",w,1,c);
        printf("\n");
        */

        sigma=xlsqr(a,b,p,w,dx,u,vx,m,n,c);

        /*
        printf("\n");
        print_mat("x",x,1,m);
        printf("\nSigma = ");
        printf(FP_FORMAT sigma);
        printf("\n");
        print_mat("u",u,1,n);
        print_mat("vx",vx,m,m);
        printf("\n");
        */

        t=-1.0;
        for (i=0;i<m;i++) {
            if (t<myABS(dx[i]))
                t=myABS(dx[i]);
            x[i]+=dx[i];
        }
        printf("Max |dX| = %Le at iteration %d.\n",t,iter+1);
        if (t<=epsilon)
            break;
    }

    printf("\n\nOUTPUT:\n\nIterations = %d\nMaximum |dX| = %Le",iter+1,t);
    print_mat("x",x,m,1);
//  print_mat("U",u,1,n);
    printf("\nSigma =\n");
    printf(FP_FORMAT sigma);
    printf("\n");
    print_mat("Vx",vx,m,m);

    printf("\nxlsqr_demo v1.0.0 Ver.: OK (c) 04/10 - Christos Iosifides\n");
    gpl();
    return 0;
}

FP_TYPE xlsqr(FP_TYPE *a, FP_TYPE *b, FP_TYPE *p, FP_TYPE *w, FP_TYPE *x, FP_TYPE *u, FP_TYPE *vx, int m, int n, int c) {
    FP_TYPE Sigma, *Pa, *Na, *Na_inv;
    FP_TYPE *tmp1, *tmp2, s, tmp;
    register int i, j, k;

    tmp1=malloc(c*n*sizeof(FP_TYPE));
    tmp2=malloc(c*n*sizeof(FP_TYPE));
    /* * * Pa = (B*P'*Bt)' * * */
    Pa=malloc(c*c*sizeof(FP_TYPE));
    /* tmp1 = B*P' */
    for (i=0;i<c;i++) {
        for (j=0;j<n;j++) {
            tmp1[i*n+j]=b[i*n+j]/p[j];
        }
    }
    /* tmp2 = tmp1*Bt */
    for (i=0;i<c;i++) {
        for (j=0;j<c;j++) {
            tmp=0.0;
            for (k=0;k<n;k++) {
                tmp+=tmp1[i*n+k]*b[j*n+k];
            }
            tmp2[i*c+j]=tmp;
        }
    }
    /* Pa = tmp2' */
    Matrix_inversion(tmp2,Pa,&s,c);
    /* * * Na = At*Pa*A * * */
    Na=tmp2;
    /* tmp1 = At*Pa */
    for (i=0;i<m;i++) {
        for (j=0;j<c;j++) {
            tmp1[i*c+j]=0.0;
            for (k=0;k<c;k++) {
                tmp1[i*c+j]+=a[k*m+i]*Pa[k*c+j];
            }
        }
    }
    /* Na = tmp1*A */
    for (i=0;i<m;i++) {
        for (j=0;j<m;j++) {
            Na[i*m+j]=0.0;
            for (k=0;k<c;k++) {
                Na[i*m+j]+=tmp1[i*c+k]*a[k*m+j];
            }
        }
    }
    /* * * Na_inv = Na' * * */
    Na_inv=vx;
    Matrix_inversion(Na,Na_inv,&s,m);

    /* * * x = Na_inv*At*Pa*w * * */
    /* tmp1 = Na_inv*At */
    for (i=0;i<m;i++) {
        for (j=0;j<c;j++) {
            tmp1[i*c+j]=0.0;
            for (k=0;k<m;k++) {
                tmp1[i*c+j]+=Na_inv[i*m+k]*a[j*m+k];
            }
        }
    }
    /* tmp2 = tmp1*Pa */
    for (i=0;i<m;i++) {
        for (j=0;j<c;j++) {
            tmp2[i*c+j]=0.0;
            for (k=0;k<c;k++) {
                tmp2[i*c+j]+=tmp1[i*c+k]*Pa[k*c+j];
            }
        }
    }
    /* x = tmp2*w */
    for (i=0;i<m;i++) {
        x[i]=0.0;
        for (k=0;k<c;k++) {
            x[i]+=tmp2[i*c+k]*w[k];
        }
    }

    /* * * u = P'*Bt*Pa*(w-A*x) * * */
    /* tmp1 = P'*Bt */
    for (i=0;i<n;i++) {
        for (j=0;j<c;j++) {
            tmp1[i*c+j]=b[j*n+i]/p[i];
        }
    }
    /* tmp2 = tmp1*Pa */
    for (i=0;i<n;i++) {
        for (j=0;j<c;j++) {
            tmp=0.0;
            for (k=0;k<c;k++) {
                tmp+=tmp1[i*c+k]*Pa[k*c+j];
            }
            tmp2[i*c+j]=tmp;
        }
    }
    free(Pa);
    /* tmp1 = w-A*x */
    for (i=0;i<c;i++) {
        tmp1[i]=w[i];
        for (j=0;j<m;j++) {
            tmp1[i]-=a[i*m+j]*x[j];
        }
    }
    /* u = tmp2*tmp1 */
    for (i=0;i<n;i++) {
        u[i]=0.0;
        for (j=0;j<c;j++) {
            u[i]+=tmp2[i*c+j]*tmp1[j];
        }
    }
    free(tmp2);

    /* * * so^2 = ut*P*u * * */
    /* tmp1 = ut*P */
    for (i=0;i<n;i++) {
        tmp1[i]=u[i]*p[i];
    }
    /* s = tmp1*u */
    s=0.0;
    for (i=0;i<n;i++) {
        s+=tmp1[i]*u[i];
    }
    Sigma=s/(c-m);
    free(tmp1);

    /* * * Vx=so^2*Na_inv * * */
    for (i=0;i<m;i++) {
        for (j=0;j<m;j++) {
            vx[i*m+j]*=Sigma;
        }
    }
    return mySQRT(Sigma);
}

int Matrix_inversion(FP_TYPE *ata,FP_TYPE *ata_inv,FP_TYPE *det,int n) {
    FP_TYPE *tmpa, *b;
    int *l;
    int r_value;
    register int i, j, k;

    tmpa=malloc(n*n*sizeof(FP_TYPE));
    b=malloc(n*sizeof(FP_TYPE));
    l=malloc(n*sizeof(int));
    r_value=1;
    for (i=0; i<n; i++) {
        memcpy(tmpa,ata,n*n*sizeof(FP_TYPE));
        for (j=0; j<n; j++) {
            b[j]=0.0;
        }
        b[i]=1.0;
//      r_value*=Gaussian_elimination(tmpa,b,det,l,n);
        r_value*=Cholesky_decomposition(tmpa, b, n);
        for (j=0; j<n; j++)
            ata_inv[j*n+i]=b[j];
    }
    return r_value;
}

int Gaussian_elimination(FP_TYPE *a, FP_TYPE *b, FP_TYPE *det, int *l, int n) {
    FP_TYPE s, d;
    int n_1, m;
    register int i, j, k;

    d=1.0;
    n_1=n-1;
    for (k=0; k<n_1; k++) {
        m=k;
        for (i=k+1; i<n; i++)
            if (myABS(a[i*n+k])>myABS(a[m*n+k]))
                m=i;
        l[k]=m;
        d*=a[m*n+k];
        if (myABS(a[m*n+k])<EPSILON)
            return 0;
        if (m!=k) {
            d=-d;
            for (j=k; j<n; j++) {
                s=a[m*n+j];
                a[m*n+j]=a[k*n+j];
                a[k*n+j]=s;
            }
        }
        for (i=k+1; i<n; i++) {
            s=a[i*n+k]/a[k*n+k];
            a[i*n+k]=s;
            for (j=k+1; j<n; j++)
                a[i*n+j]-=s*a[k*n+j];
        }
    }
    d*=a[n_1*n+n_1];
    if (myABS(d)<EPSILON)
        return 0;
    *det=d;
    for (k=0; k<n_1; k++) {
        m=l[k];
        s=b[m];
        b[m]=b[k];
        b[k]=s;
        for (i=k+1; i<n; i++)
            b[i]-=s*a[i*n+k];
    }
    b[n_1]/=a[n_1*n+n_1];
    for (k=n-2; k>=0; k--) {
        s=0.;
        for (j=k+1; j<n; j++)
            s+=a[k*n+j]*b[j];
        b[k]=(b[k]-s)/a[k*n+k];
    }
    return 1;
}

int Cholesky_decomposition(FP_TYPE *a, FP_TYPE *b, int n) {
    FP_TYPE s;
    register int i, j, k;

    for (i=0; i<n; i++) {
        s=0.0;
        if (i)
            for (j=0; j<i; j++)
                s+=mySQR(a[i*n+j]);
        a[i*n+i]=mySQRT(a[i*n+i]-s);
        if (myABS(a[i*n+i])<EPSILON)
            return 0;
        if (i<n-1)
            for (j=i+1; j<n; j++) {
                s=0.0;
                if (i)
                    for (k=0; k<i; k++)
                        s+=a[i*n+k]*a[j*n+k];
                a[j*n+i]=(a[j*n+i]-s)/a[i*n+i];
            }
    }
    b[0]/=a[0*n+0];
    for (k=1; k<n; k++) {
        s=0.0;
        for (j=0; j<k; j++)
            s+=a[k*n+j]*b[j];
        b[k]=(b[k]-s)/a[k*n+k];
    }
    b[n-1]/=a[(n-1)*n+n-1];
    for (i=0; i<n-1; i++) {
        s=0.0;
        for (j=n-i-1; j<n; j++)
            s+=a[j*n+n-i-2]*b[j];
        b[n-i-2]=(b[n-i-2]-s)/a[(n-i-2)*n+n-i-2];
    }
    return 1;
}

void gpl(void) {
    puts("\nCopyright (C) Apr 2010 Ch Iossif <chiossif@yahoo.com>");

    puts("\nThis program is free software: you can redistribute it and/or modify");
    puts("it under the terms of the GNU General Public License as published by");
    puts("the Free Software Foundation, either version 3 of the License, or");
    puts("(at your option) any later version.");

    puts("\nThis program is distributed in the hope that it will be useful,");
    puts("but WITHOUT ANY WARRANTY; without even the implied warranty of");
    puts("MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the");
    puts("GNU General Public License for more details.");

    puts("\nYou should have received a copy of the GNU General Public License");
    puts("along with this program.  If not, see <http://www.gnu.org/licenses/>.\n");
}

FP_TYPE mySQRT(FP_TYPE x) {
    FP_TYPE px,a;

    a=px=x;
    x/=2.0;
    while (myABS(x-px)>EPSILON) {
        px=x;
        x=(px+a/px)/2;
    }
    return x;
}

void print_mat(const char *s, FP_TYPE *a, int m, int n) {
    register i,j;

    printf("\n%s(%dx%d) = \n",s,m,n);
    for (i=0;i<m;i++) {
        for (j=0;j<n;j++)
            printf(FP_FORMAT a[i*n+j]);
        printf("\n");
    }
    return;
}

