# # DRSEx. 7 - Remote Sensing Lab. - SRSE NTUA # # Calculate Confusion Matrix and error / statistics # # Reference: # # https://www.gdal.org/gdal_tutorial.html # https://www.qgistutorials.com/en/docs/sampling_raster_data.html # https://plugins.qgis.org/plugins/pointsamplingtool # https://www.harrisgeospatial.com/docs/CalculatingConfusionMatrices.html # # License: # https://www.gnu.org/licenses/gpl.txt # from osgeo import gdal from math import sqrt import struct import sys import numpy as np ### Enter your classified image full path filename here class_filename="Cropped_A_k-means_12_clusters.tif" ### Enter your truth image full path filename here truth_filename="Cropped_A_k-means_8_clusters.tif" ### Enter your confusion matrix full path filename here conf_filename="Cropped_A_k-means_8_12.csv" class_dataset = gdal.Open(class_filename, gdal.GA_ReadOnly) if not class_dataset: print("Unable to open ",class_filename," file") sys.exit(1) print("Classified Image :") print("Driver: {}/{}".format(class_dataset.GetDriver().ShortName, class_dataset.GetDriver().LongName)) print("Size is {} x {} x {} = {}".format(class_dataset.RasterXSize, class_dataset.RasterYSize, class_dataset.RasterCount, class_dataset.RasterXSize * class_dataset.RasterYSize * class_dataset.RasterCount)) print("Projection is {}".format(class_dataset.GetProjection())) geotransform = class_dataset.GetGeoTransform() if geotransform: print("Origin = ({}, {})".format(geotransform[0], geotransform[3])) print("Pixel Size = ({}, {})".format(geotransform[1], geotransform[5])) class_bands = class_dataset.RasterCount class_rows = class_dataset.RasterYSize class_cols = class_dataset.RasterXSize bands = class_bands for bandno in range(1,bands+1): print("Band={}".format(bandno)) band = class_dataset.GetRasterBand(bandno) print("Band Type={}".format(gdal.GetDataTypeName(band.DataType))) min = band.GetMinimum() max = band.GetMaximum() if not min or not max: (min,max) = band.ComputeRasterMinMax(True) print("Min={:.3f}, Max={:.3f}".format(min,max)) if band.GetOverviewCount() > 0: print("Band has {} overviews".format(band.GetOverviewCount())) if band.GetRasterColorTable(): print("Band has a color table with {} entries".format(band.GetRasterColorTable().GetCount())) # My minimum / maximum calculation - Accurate myMin = 65536 myMax = -65537 myNum = 0 for y in range(0,class_rows): scanline = band.ReadRaster(xoff=0, yoff=y, xsize=band.XSize, ysize=1, buf_xsize=band.XSize, buf_ysize=1, buf_type=gdal.GDT_Float32) tuple_of_floats = struct.unpack('f' * band.XSize, scanline) for x in tuple_of_floats: myNum += 1 if x > myMax: myMax = x if x < myMin: myMin = x print("Min={:.3f}, Max={:.3f}, Samples={}".format(myMin,myMax,myNum)) conf_cols = int(myMax)+1 if class_bands!=1: print("More that one band in ",class_filename," image") sys.exit(1) ### Enter your truth image full path filename here #truth_filename="Cropped_A_k-means_8_clusters.tif" truth_dataset = gdal.Open(truth_filename, gdal.GA_ReadOnly) if not truth_dataset: print("Unable to open ",truth_filename," file") sys.exit(1) print("Truth Image :") print("Driver: {}/{}".format(truth_dataset.GetDriver().ShortName, truth_dataset.GetDriver().LongName)) print("Size is {} x {} x {} = {}".format(truth_dataset.RasterXSize, truth_dataset.RasterYSize, truth_dataset.RasterCount, truth_dataset.RasterXSize * truth_dataset.RasterYSize * truth_dataset.RasterCount)) print("Projection is {}".format(class_dataset.GetProjection())) geotransform = truth_dataset.GetGeoTransform() if geotransform: print("Origin = ({}, {})".format(geotransform[0], geotransform[3])) print("Pixel Size = ({}, {})".format(geotransform[1], geotransform[5])) truth_bands = truth_dataset.RasterCount truth_rows = truth_dataset.RasterYSize truth_cols = truth_dataset.RasterXSize bands = truth_bands for bandno in range(1,truth_bands+1): print("Band={}".format(bandno)) band = truth_dataset.GetRasterBand(bandno) print("Band Type={}".format(gdal.GetDataTypeName(band.DataType))) min = band.GetMinimum() max = band.GetMaximum() if not min or not max: (min,max) = band.ComputeRasterMinMax(True) print("Min={:.3f}, Max={:.3f}".format(min,max)) if band.GetOverviewCount() > 0: print("Band has {} overviews".format(band.GetOverviewCount())) if band.GetRasterColorTable(): print("Band has a color table with {} entries".format(band.GetRasterColorTable().GetCount())) # My minimum / maximum calculation - Accurate myMin = 65536 myMax = -65537 myNum = 0 for y in range(0,truth_rows): scanline = band.ReadRaster(xoff=0, yoff=y, xsize=band.XSize, ysize=1, buf_xsize=band.XSize, buf_ysize=1, buf_type=gdal.GDT_Float32) tuple_of_floats = struct.unpack('f' * band.XSize, scanline) for x in tuple_of_floats: myNum += 1 if x > myMax: myMax = x if x < myMin: myMin = x print("Min={:.3f}, Max={:.3f}, Samples={}".format(myMin,myMax,myNum)) conf_rows = int(myMax)+1 if truth_bands!=1: print("More that one band in ",truth_filename," image") sys.exit(1) #Size check if truth_rows!=class_rows or class_cols!=truth_cols: print("Images ",truth_filename," and ",class_filename,"have different sizes") sys.exit(1) # Calculate confusion matrix conf = np.zeros( (conf_rows, conf_cols), dtype=int ) truth_band = truth_dataset.GetRasterBand(1) class_band = class_dataset.GetRasterBand(1) for y in range(0,truth_rows): truth_scanline = truth_band.ReadRaster(xoff=0, yoff=y, xsize=truth_band.XSize, ysize=1, buf_xsize=truth_band.XSize, buf_ysize=1, buf_type=gdal.GDT_Float32) class_scanline = class_band.ReadRaster(xoff=0, yoff=y, xsize=class_band.XSize, ysize=1, buf_xsize=class_band.XSize, buf_ysize=1, buf_type=gdal.GDT_Float32) truth_tuple_of_floats = struct.unpack('f' * truth_band.XSize, truth_scanline) class_tuple_of_floats = struct.unpack('f' * class_band.XSize, class_scanline) for i in range(truth_cols): t = int(truth_tuple_of_floats[i]) c = int(class_tuple_of_floats[i]) conf[t,c] += 1 print("Confusion matrix :",conf.shape) print(conf) np.savetxt(conf_filename, conf, delimiter=";") # Sums of rows and columns conf_row_sum = np.sum(conf, axis = 1) conf_col_sum = np.sum(conf, axis = 0) print(conf_row_sum) print(conf_col_sum) # The overall accuracy is calculated by summing the number of correctly classified values and dividing by the total number of values. The correctly classified values are located along the upper-left to lower-right diagonal of the confusion matrix. The total number of values is the number of values in either the truth or predicted-value arrays. nom = 0 den = 0 conf_row_sum = np.sum(conf, axis = 1) conf_col_sum = np.sum(conf, axis = 0) for r in range(conf_rows): for c in range(conf_cols): if r==c: nom += conf[r,c] den += conf[r,c] overall = 100.0 * nom / den print("Overall accuracy {:.4f}%".format(overall)) # The kappa coefficient measures the agreement between classification and truth values. A kappa value of 1 represents perfect agreement, while a value of 0 represents no agreement. N = den mii = nom CiGi = 0 for i in range(conf_rows): CiGi += conf_row_sum[i] * conf_col_sum[i] kappa = (N * mii - CiGi) / (N*N - CiGi) print("kappa coefficient {:.6f}".format(kappa)) # Errors of commission represent the fraction of values that were predicted to be in a class but do not belong to that class. They are a measure of false positives. Errors of commission are shown in the rows of the confusion matrix, except for the values along the diagonal. commission = np.zeros(conf_rows, dtype=float) for r in range(conf_rows): if conf_row_sum[r]>0 and r0 and c0 and c0 and rOpen python console >>> prompt import os # make os functions avaliable here os.chdir('C:\DRS\Ex7') # change to working folder exec(open('ConfusionMatrix.py').read()) # run your code Python3 QGIS 3.x """)