#
# DRSEx. 7 - Remote Sensing Lab. - SRSE NTUA
#
# Calculate Confusion Matrix and error / statistics
#
# Reference:
#	
#            https://www.gdal.org/gdal_tutorial.html
#            https://www.qgistutorials.com/en/docs/sampling_raster_data.html
#            https://plugins.qgis.org/plugins/pointsamplingtool
#            https://www.harrisgeospatial.com/docs/CalculatingConfusionMatrices.html
#
# License: 
#            https://www.gnu.org/licenses/gpl.txt
#
from osgeo import gdal
from math import sqrt
import struct
import sys
import numpy as np

### Enter your classified image full path filename here
class_filename="Cropped_A_k-means_12_clusters.tif"
### Enter your truth image full path filename here
truth_filename="Cropped_A_k-means_8_clusters.tif"
### Enter your confusion matrix full path filename here
conf_filename="Cropped_A_k-means_8_12.csv"

class_dataset = gdal.Open(class_filename, gdal.GA_ReadOnly)
if not class_dataset:
    print("Unable to open ",class_filename," file")
    sys.exit(1)

print("Classified Image :")
print("Driver: {}/{}".format(class_dataset.GetDriver().ShortName,
                             class_dataset.GetDriver().LongName))
print("Size is {} x {} x {} = {}".format(class_dataset.RasterXSize,
                                    class_dataset.RasterYSize,
                                    class_dataset.RasterCount,
                                    class_dataset.RasterXSize * class_dataset.RasterYSize * class_dataset.RasterCount))
print("Projection is {}".format(class_dataset.GetProjection()))
geotransform = class_dataset.GetGeoTransform()
if geotransform:
    print("Origin = ({}, {})".format(geotransform[0], geotransform[3]))
    print("Pixel Size = ({}, {})".format(geotransform[1], geotransform[5]))
class_bands = class_dataset.RasterCount
class_rows  = class_dataset.RasterYSize
class_cols  = class_dataset.RasterXSize

bands = class_bands
for bandno in range(1,bands+1):
    print("Band={}".format(bandno))
    band = class_dataset.GetRasterBand(bandno)
    print("Band Type={}".format(gdal.GetDataTypeName(band.DataType)))

    min = band.GetMinimum()
    max = band.GetMaximum()
    if not min or not max:
        (min,max) = band.ComputeRasterMinMax(True)
    print("Min={:.3f}, Max={:.3f}".format(min,max))
      
    if band.GetOverviewCount() > 0:
        print("Band has {} overviews".format(band.GetOverviewCount()))
      
    if band.GetRasterColorTable():
        print("Band has a color table with {} entries".format(band.GetRasterColorTable().GetCount()))
# My minimum / maximum calculation - Accurate
    myMin =  65536
    myMax = -65537
    myNum = 0
    for y in range(0,class_rows):
        scanline = band.ReadRaster(xoff=0, yoff=y, xsize=band.XSize, ysize=1, buf_xsize=band.XSize, buf_ysize=1, buf_type=gdal.GDT_Float32)
        tuple_of_floats = struct.unpack('f' * band.XSize, scanline)
        for x in tuple_of_floats:
            myNum  += 1
            if x > myMax:
                myMax = x
            if x < myMin:
                myMin = x
    print("Min={:.3f}, Max={:.3f}, Samples={}".format(myMin,myMax,myNum))
conf_cols = int(myMax)+1
if class_bands!=1:
    print("More that one band in ",class_filename," image")
    sys.exit(1)

### Enter your truth image full path filename here
#truth_filename="Cropped_A_k-means_8_clusters.tif"

truth_dataset = gdal.Open(truth_filename, gdal.GA_ReadOnly)
if not truth_dataset:
    print("Unable to open ",truth_filename," file")
    sys.exit(1)

print("Truth Image :")
print("Driver: {}/{}".format(truth_dataset.GetDriver().ShortName,
                             truth_dataset.GetDriver().LongName))
print("Size is {} x {} x {} = {}".format(truth_dataset.RasterXSize,
                                    truth_dataset.RasterYSize,
                                    truth_dataset.RasterCount,
                                    truth_dataset.RasterXSize * truth_dataset.RasterYSize * truth_dataset.RasterCount))
print("Projection is {}".format(class_dataset.GetProjection()))
geotransform = truth_dataset.GetGeoTransform()
if geotransform:
    print("Origin = ({}, {})".format(geotransform[0], geotransform[3]))
    print("Pixel Size = ({}, {})".format(geotransform[1], geotransform[5]))

truth_bands = truth_dataset.RasterCount
truth_rows = truth_dataset.RasterYSize
truth_cols = truth_dataset.RasterXSize
bands = truth_bands
for bandno in range(1,truth_bands+1):
    print("Band={}".format(bandno))
    band = truth_dataset.GetRasterBand(bandno)
    print("Band Type={}".format(gdal.GetDataTypeName(band.DataType)))

    min = band.GetMinimum()
    max = band.GetMaximum()
    if not min or not max:
        (min,max) = band.ComputeRasterMinMax(True)
    print("Min={:.3f}, Max={:.3f}".format(min,max))
      
    if band.GetOverviewCount() > 0:
        print("Band has {} overviews".format(band.GetOverviewCount()))
      
    if band.GetRasterColorTable():
        print("Band has a color table with {} entries".format(band.GetRasterColorTable().GetCount()))
# My minimum / maximum calculation - Accurate
    myMin =  65536
    myMax = -65537
    myNum = 0
    for y in range(0,truth_rows):
        scanline = band.ReadRaster(xoff=0, yoff=y, xsize=band.XSize, ysize=1, buf_xsize=band.XSize, buf_ysize=1, buf_type=gdal.GDT_Float32)
        tuple_of_floats = struct.unpack('f' * band.XSize, scanline)
        for x in tuple_of_floats:
            myNum  += 1
            if x > myMax:
                myMax = x
            if x < myMin:
                myMin = x
    print("Min={:.3f}, Max={:.3f}, Samples={}".format(myMin,myMax,myNum))
conf_rows = int(myMax)+1
if truth_bands!=1:
    print("More that one band in ",truth_filename," image")
    sys.exit(1)

#Size check
if truth_rows!=class_rows or class_cols!=truth_cols:
    print("Images ",truth_filename," and ",class_filename,"have different sizes")
    sys.exit(1)

# Calculate confusion matrix
conf = np.zeros( (conf_rows, conf_cols), dtype=int )

truth_band = truth_dataset.GetRasterBand(1)
class_band = class_dataset.GetRasterBand(1)
for y in range(0,truth_rows):
    truth_scanline = truth_band.ReadRaster(xoff=0, yoff=y, xsize=truth_band.XSize, ysize=1, buf_xsize=truth_band.XSize, buf_ysize=1, buf_type=gdal.GDT_Float32)
    class_scanline = class_band.ReadRaster(xoff=0, yoff=y, xsize=class_band.XSize, ysize=1, buf_xsize=class_band.XSize, buf_ysize=1, buf_type=gdal.GDT_Float32)
    truth_tuple_of_floats = struct.unpack('f' * truth_band.XSize, truth_scanline)
    class_tuple_of_floats = struct.unpack('f' * class_band.XSize, class_scanline)
    for i in range(truth_cols):
        t = int(truth_tuple_of_floats[i])
        c = int(class_tuple_of_floats[i])
        conf[t,c] += 1

print("Confusion matrix :",conf.shape)
print(conf)
np.savetxt(conf_filename, conf, delimiter=";")

# Sums of rows and columns
conf_row_sum = np.sum(conf, axis = 1)
conf_col_sum = np.sum(conf, axis = 0)
print(conf_row_sum)
print(conf_col_sum)
    
# The overall accuracy is calculated by summing the number of correctly classified values and dividing by the total number of values. The correctly classified values are located along the upper-left to lower-right diagonal of the confusion matrix. The total number of values is the number of values in either the truth or predicted-value arrays.
nom = 0
den = 0
conf_row_sum = np.sum(conf, axis = 1)
conf_col_sum = np.sum(conf, axis = 0)
for r in range(conf_rows):
    for c in range(conf_cols):
        if r==c:
            nom += conf[r,c]
        den += conf[r,c]
overall = 100.0 * nom / den
print("Overall accuracy {:.4f}%".format(overall))

# The kappa coefficient measures the agreement between classification and truth values. A kappa value of 1 represents perfect agreement, while a value of 0 represents no agreement. 
N = den
mii = nom
CiGi = 0
for i in range(conf_rows):
    CiGi += conf_row_sum[i] * conf_col_sum[i]
kappa = (N * mii - CiGi) / (N*N - CiGi)
print("kappa coefficient {:.6f}".format(kappa))

# Errors of commission represent the fraction of values that were predicted to be in a class but do not belong to that class. They are a measure of false positives. Errors of commission are shown in the rows of the confusion matrix, except for the values along the diagonal.
commission =  np.zeros(conf_rows, dtype=float)
for r in range(conf_rows):
    if conf_row_sum[r]>0 and r<conf_cols:
        commission[r] = float(conf_row_sum[r] - conf[r,r]) / conf_row_sum[r]
    else:
        commission[r] = -1.0
print("Commission errors :", commission)

# Errors of omission represent the fraction of values that belong to a class but were predicted to be in a different class. They are a measure of false negatives. Errors of omission are shown in the columns of the confusion matrix, except for the values along the main diagonal.
omission =  np.zeros(conf_cols, dtype=float)
print(conf_cols,conf_col_sum.shape,commission.shape)
for c in range(conf_cols):
    if conf_col_sum[c]>0 and c<conf_rows:
        omission[c] = float(conf_col_sum[c] - conf[c,c]) / conf_col_sum[c]
    else: 
        omission[c] = -1.0
print("Ommission errors :", omission)

# Producer accuracy is the probability that a value in a given class was classified correctly.
producer = np.zeros(conf_cols, dtype=float)
for c in range(conf_cols):
    if conf_col_sum[c]>0 and c<conf_rows:
        producer[c] = 100.0 * conf[c,c] / conf_col_sum[c]
    else:    
        producer[c] = -99.0
print("Producer accuracy % :", producer)

# User accuracy is the probability that a value predicted to be in a certain class really is that class.
user =  np.zeros(conf_rows, dtype=float)
for r in range(conf_rows):
    if conf_row_sum[r]>0 and r<conf_cols:
        user[r] = 100.0 * conf[r,r] / conf_row_sum[r]
    else:
        user[r] = -99.0
print("User accuracy % :", user)

print("""
# Run inside QGIS at Plugins->Open python console >>> prompt
import os # make os functions avaliable here
os.chdir('C:\DRS\Ex7') # change to working folder
exec(open('ConfusionMatrix.py').read()) # run your code Python3 QGIS 3.x
""")