/* lsqr v1.0.0 Ver.: OK (c) 01/10 - Christos Iosifides */
/*
   This program solves the least squares solution x that minimizes norm |A*x-B|

   Name:			lsqr
   Version:		    1.0.0
   Copyright:	    Ch Iossif @ 2010
   Author:		    Christos Iosifidis
   Date:			20/01/10
   Modified:		05/02/10 11:38
   Description:     Solves the least squares solution x that minimizes norm |A*x-B|
   Compile:
            Linux:     gcc lsqr.c -o lsqr -lm
            Windows:   gcc lsqr.c -o lsqr.exe -lm or similar with any ANSI C compiler

  INPUT:

  M =  4 N =  2

  A =

  0.588 -0.809
 -0.768 -0.64
 -0.404  0.915
  0.898  0.441

  B =

 -0.05  0.07  0.06 -0.08

  OUTPUT:

  X =

 -0.09428331  0.00918387017

  sigma =

  0.0134242658

  Vx =

  0.00704930526 -0.000141895141
 -0.000141895141  0.00640821409

 Copyright (C) Dec 2009 Ch Iossif <chiossif@yahoo.com>

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define myABS(x) (((x)<0)?-(x):(x))
#define mySQR(x) ((x)*(x))

//#define FP_TYPE double
//#define EPSILON 1.e-15
//#define FP_FORMAT "%16.10lf",

#define FP_TYPE long double
#define EPSILON 1.e-19
#define FP_FORMAT "%24.15Lf",

//#define FP_TYPE __float128
//#define EPSILON 1.e-34
//#define FP_FORMAT "%40.34Lf",(long double)

FP_TYPE mySQRT(FP_TYPE);
void gpl(void);
int Matrix_inversion(FP_TYPE *,FP_TYPE *,FP_TYPE *,int);
int Gaussian_elimination(FP_TYPE *,FP_TYPE *,FP_TYPE *,int *,int);
int Cholesky_decomposition(FP_TYPE *, FP_TYPE *, int);

int main(void) {
    FP_TYPE *a, *b, *x, s, *ata, *atb, *ata_inv;
    long double t;
    int m, n, *l;
    register int i, j, k;

    scanf("%d %d",&m,&n);
    a=malloc(m*n*sizeof(FP_TYPE));
    b=malloc(m*sizeof(FP_TYPE));
    x=malloc(n*sizeof(FP_TYPE));
    l=malloc(n*sizeof(int));
    ata=malloc(n*n*sizeof(FP_TYPE));
    atb=malloc(m*sizeof(FP_TYPE));
    ata_inv=malloc(n*n*sizeof(FP_TYPE));

    for (i=0; i<m; i++)
        for (j=0; j<n; j++) {
            scanf("%Lf",&t);
            a[i*n+j]=t;
        }
    for (i=0; i<m; i++) {
        scanf("%Lf",&t);
        b[i]=t;
    }

    printf("\nlsqr solves the least squares solution x\n");
    printf("that minimizes norm |A*x-B|\n");
    printf("\nINPUT:\n");
    printf("\nM = %d N = %d\n",m,n);
    printf("\nA =\n");
    for (i=0; i<m; i++) {
        for (j=0; j<n; j++)
            printf(FP_FORMAT a[i*n+j]);
        printf("\n");
    }
    printf("\nB =\n");
    for (i=0; i<m; i++) {
        printf(FP_FORMAT b[i]);
        printf("\n");
    }

    for (i=0; i<n; i++)
        for (j=0; j<n; j++) {
            ata[i*n+j]=0.0;
            for (k=0; k<m; k++) {
                ata[i*n+j]+=a[k*n+i]*a[k*n+j];
            }
        }
    for (i=0; i<n; i++) {
        atb[i]=0.0;
        for (j=0; j<m; j++)
            atb[i]+=a[j*n+i]*b[j];
    }
    printf("\nCholesky decomposition - Input:\n\nAt.A | At.B :\n");
    for (i=0; i<n; i++) {
        for (j=0; j<n; j++)
            printf(FP_FORMAT ata[i*n+j]);
        printf("  | ");
        printf(FP_FORMAT atb[i]);
        printf("\n");
    }
    Cholesky_decomposition(ata,atb,n);
    for (i=0; i<n; i++) {
        x[i]=atb[i];
    }
    printf("\nCholesky decomposition - Output:\n\n X :\n");
    for (i=0; i<n; i++) {
        printf(FP_FORMAT x[i]);
    }
    printf("\n");
    for (i=0; i<m; i++) {
        atb[i]=-b[i];
        for (j=0; j<n; j++)
            atb[i]+=a[i*n+j]*x[j];
    }
    s=0.0;
    for (i=0; i<m; i++)
        s+=atb[i]*atb[i];
    s=mySQRT(s/(m-n));
    printf("\nSigma =");
    printf(FP_FORMAT s);
    printf("\n");

    for (i=0; i<n; i++)
        for (j=0; j<n; j++) {
            ata[i*n+j]=0.0;
            for (k=0; k<m; k++) {
                ata[i*n+j]+=a[k*n+i]*a[k*n+j];
            }
        }
    for (i=0; i<n; i++) {
        atb[i]=0.0;
        for (j=0; j<m; j++)
            atb[i]+=a[j*n+i]*b[j];
    }
    printf("\nGaussian_elimination - Input:\n\nAt.A | At.B :\n");
    for (i=0; i<n; i++) {
        for (j=0; j<n; j++)
            printf(FP_FORMAT ata[i*n+j]);
        printf("  | ");
        printf(FP_FORMAT atb[i]);
        printf("\n");
    }
    Gaussian_elimination(ata,atb,&s,l,n);
    for (i=0; i<n; i++) {
        x[i]=atb[i];
    }
    printf("\nGaussian_elimination - Output:\n\n X :\n");
    for (i=0; i<n; i++) {
        printf(FP_FORMAT x[i]);
    }
    printf("\n");
    for (i=0; i<m; i++) {
        atb[i]=-b[i];
        for (j=0; j<n; j++)
            atb[i]+=a[i*n+j]*x[j];
    }
    s=0.0;
    for (i=0; i<m; i++)
        s+=atb[i]*atb[i];
    s=mySQRT(s/(m-n));
    printf("\nSigma =");
    printf(FP_FORMAT s);
    printf("\n");

    for (i=0; i<n; i++)
        for (j=0; j<n; j++) {
            ata[i*n+j]=0.0;
            for (k=0; k<m; k++) {
                ata[i*n+j]+=a[k*n+i]*a[k*n+j];
            }
        }
    for (i=0; i<n; i++) {
        atb[i]=0.0;
        for (j=0; j<m; j++)
            atb[i]+=a[j*n+i]*b[j];
    }
    printf("\nMatrix_inversion - Input:\n\nAt.A | At.B :\n");
    for (i=0; i<n; i++) {
        for (j=0; j<n; j++)
            printf(FP_FORMAT ata[i*n+j]);
        printf("  | ");
        printf(FP_FORMAT atb[i]);
        printf("\n");
    }
    Matrix_inversion(ata,ata_inv,&s,n);
    for (i=0; i<n; i++) {
        x[i]=0.0;
        for (j=0; j<n; j++)
            x[i]+=ata_inv[i*n+j]*atb[j];
    }
    printf("\nMatrix_inversion - Output:\n\n X :\n");
    for (i=0; i<n; i++) {
        printf(FP_FORMAT x[i]);
    }
    printf("\n");

    for (i=0; i<m; i++) {
        atb[i]=-b[i];
        for (j=0; j<n; j++)
            atb[i]+=a[i*n+j]*x[j];
    }
    s=0.0;
    for (i=0; i<m; i++)
        s+=atb[i]*atb[i];
    s=mySQRT(s/(m-n));

    printf("\nOUTPUT:\n");
    printf("\nX =\n");
    for (i=0; i<n; i++) {
        printf(FP_FORMAT x[i]);
        printf("\n");
    }
    printf("\nSigma =");
    printf(FP_FORMAT s);
    printf("\n");
    printf("\nVx =\n");
    for (i=0; i<n; i++) {
        for (j=0; j<n; j++)
            printf(FP_FORMAT (s*ata_inv[i*n+j]));
        printf("\n");
    }

    printf("\n lsqr v1.0.0 Ver.: OK (c) 01/10 - Christos Iosifides\n");
    gpl();
    return 0;
}

int Matrix_inversion(FP_TYPE *ata,FP_TYPE *ata_inv,FP_TYPE *det,int n) {
    FP_TYPE *tmpa, *b;
    int *l;
    int r_value;
    register int i, j, k;

    tmpa=malloc(n*n*sizeof(FP_TYPE));
    b=malloc(n*sizeof(FP_TYPE));
    l=malloc(n*sizeof(int));
    r_value=1;
    for (i=0; i<n; i++) {
        for (j=0; j<n; j++) {
            for (k=0; k<n; k++)
                tmpa[j*n+k]=ata[j*n+k];
            b[j]=0.0;
        }
        b[i]=1.0;
        r_value*=Gaussian_elimination(tmpa,b,det,l,n);
//        r_value*=Cholesky_decomposition(tmpa, b, n);
        for (j=0; j<n; j++)
            ata_inv[j*n+i]=b[j];
    }
    return r_value;
}

int Gaussian_elimination(FP_TYPE *a, FP_TYPE *b, FP_TYPE *det, int *l, int n) {
    FP_TYPE s, d;
    int n_1, m;
    register int i, j, k;

    d=1.0;
    n_1=n-1;
    for (k=0; k<n_1; k++) {
        m=k;
        for (i=k+1; i<n; i++)
            if (myABS(a[i*n+k])>myABS(a[m*n+k]))
                m=i;
        l[k]=m;
        d*=a[m*n+k];
        if (myABS(a[m*n+k])<EPSILON)
            return 0;
        if (m!=k) {
            d=-d;
            for (j=k; j<n; j++) {
                s=a[m*n+j];
                a[m*n+j]=a[k*n+j];
                a[k*n+j]=s;
            }
        }
        for (i=k+1; i<n; i++) {
            s=a[i*n+k]/a[k*n+k];
            a[i*n+k]=s;
            for (j=k+1; j<n; j++)
                a[i*n+j]-=s*a[k*n+j];
        }
    }
    d*=a[n_1*n+n_1];
    if (myABS(d)<EPSILON)
        return 0;
    *det=d;
    for (k=0; k<n_1; k++) {
        m=l[k];
        s=b[m];
        b[m]=b[k];
        b[k]=s;
        for (i=k+1; i<n; i++)
            b[i]-=s*a[i*n+k];
    }
    b[n_1]/=a[n_1*n+n_1];
    for (k=n-2; k>=0; k--) {
        s=0.;
        for (j=k+1; j<n; j++)
            s+=a[k*n+j]*b[j];
        b[k]=(b[k]-s)/a[k*n+k];
    }
    return 1;
}

int Cholesky_decomposition(FP_TYPE *a, FP_TYPE *b, int n) {
    FP_TYPE s;
    register int i, j, k;

    for (i=0; i<n; i++) {
        s=0.0;
        if (i)
            for (j=0; j<i; j++)
                s+=mySQR(a[i*n+j]);
        a[i*n+i]=mySQRT(a[i*n+i]-s);
        if (myABS(a[i*n+i])<EPSILON)
            return 0;
        if (i<n-1)
            for (j=i+1; j<n; j++) {
                s=0.0;
                if (i)
                    for (k=0; k<i; k++)
                        s+=a[i*n+k]*a[j*n+k];
                a[j*n+i]=(a[j*n+i]-s)/a[i*n+i];
            }
    }
    b[0]/=a[0*n+0];
    for (k=1; k<n; k++) {
        s=0.0;
        for (j=0; j<k; j++)
            s+=a[k*n+j]*b[j];
        b[k]=(b[k]-s)/a[k*n+k];
    }
    b[n-1]/=a[(n-1)*n+n-1];
    for (i=0; i<n-1; i++) {
        s=0.0;
        for (j=n-i-1; j<n; j++)
            s+=a[j*n+n-i-2]*b[j];
        b[n-i-2]=(b[n-i-2]-s)/a[(n-i-2)*n+n-i-2];
    }
    return 1;
}

void gpl(void) {
    puts("\nCopyright (C) Dec 2009 Ch Iossif <chiossif@yahoo.com>");

    puts("\nThis program is free software: you can redistribute it and/or modify");
    puts("it under the terms of the GNU General Public License as published by");
    puts("the Free Software Foundation, either version 3 of the License, or");
    puts("(at your option) any later version.");

    puts("\nThis program is distributed in the hope that it will be useful,");
    puts("but WITHOUT ANY WARRANTY; without even the implied warranty of");
    puts("MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the");
    puts("GNU General Public License for more details.");

    puts("\nYou should have received a copy of the GNU General Public License");
    puts("along with this program.  If not, see <http://www.gnu.org/licenses/>.\n");
}

FP_TYPE mySQRT(FP_TYPE x) {
    FP_TYPE px,a;

    a=px=x;
    x/=2.0;
    while (myABS(x-px)>EPSILON) {
        px=x;
        x=(px+a/px)/2;
    }
    return x;
}

