Land cover classification using a Naives Bayes algorithm with Python – Tutorial

Hydrate

Machine learning can be applied to many fields as land cover classification from remote sensing imagery. The performance and accuracy of classification will depend on the number of raster bands, the image resolution, the land cover type and the algorithm used. This tutorial will perform an applied case of land cover classification from a panchromatic image in Python using the Naives Bayes algorithm implemented on the Scikit Learn package. The classification will cover four categories as: rivers, river beaches, woods and pastures; coding is performed under a Jupyter Notebook with Python running from a geospatial Conda environment. Some graphics and statistics about the classification precision are also included on the tutorial.

IMPORTANT: You will need a conda environment with geospatial tools for the tutorial. Create the environment following this link: hatarilabs.com/ih-en/how-to-install-python-geopandas-in-windows-on-a-conda-environment-tutorial

Tutorial

Code

Part 1 Data processing

import rasterio
from rasterio.plot import show
classRst =  rasterio.open('../Data/Class/LandCoverClassified_v2.tif')
show(classRst)

classBand = classRst.read(1)
classBand[:5,:5]
array([[40., 40., 40., 40., 40.],
       [40., 40., 40., 40., 40.],
       [40., 40., 40., 40., 40.],
       [40., 40., 40., 40., 40.],
       [40., 40., 40., 40., 40.]], dtype=float32)
nrow, ncol = classBand.shape
print(nrow,ncol)
42 56
# get a sample pixel coordinates 
for row in range(nrow)[:3]:
    for col in range(ncol)[:3]:
        print(classRst.xy(row,col))
(-73.163875, -3.916002123)
(-73.16362500000001, -3.916002123)
(-73.163375, -3.916002123)
(-73.163875, -3.916252123)
(-73.16362500000001, -3.916252123)
(-73.163375, -3.916252123)
(-73.163875, -3.9165021230000003)
(-73.16362500000001, -3.9165021230000003)
(-73.163375, -3.9165021230000003)
# get pixel value for a give coordinate
imageRst =  rasterio.open('../Data/Rst/riverImage2.tif')
imageBand = imageRst.read(1)
row, col = imageRst.index(-73.16275, -3.91712)
bandValue = imageBand[row,col]
print(bandValue)
15
# open list of image bands
redBand = imageRst.read(1)
greenBand = imageRst.read(2)
blueBand = imageRst.read(3)
bandList = [redBand, greenBand, blueBand]
#define a function that returns a band value
def rasterValue(imageRst,imageBand,x,y):
    row, col = imageRst.index(x,y)
    bandValue = imageBand[row,col]
    return(bandValue)
rasterValue(imageRst,blueBand,-73.16275, -3.91712)
45
#create complex function for all rows, cols and bands
import sys
classList = []

# get a sample pixel coordinates 
for row in range(nrow):
    for col in range(ncol):
        sys.stdout.write('r'+'Working with row, col: %d %d'%(row,col))
        partialLine = []
        x,y = classRst.xy(row,col)
        for band in bandList:
            value = rasterValue(imageRst,band,x,y)
            partialLine.append(value)
        partialLine.append(classBand[row,col])
        classList.append(partialLine)
Working with row, col: 41 55
import numpy as np
classArray = np.array(classList)
classArray[:5]
array([[15., 56., 40., 40.],
       [10., 51., 35., 40.],
       [13., 63., 42., 40.],
       [13., 65., 42., 40.],
       [18., 70., 47., 40.]], dtype=float32)
np.save('../Data/Txt/classArray',classArray)

Part 2 Naives Bayes

import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
classArray = np.load('../Data/Txt/classArray.npy')
classArray[:5]
array([[15., 56., 40., 40.],
       [10., 51., 35., 40.],
       [13., 63., 42., 40.],
       [13., 65., 42., 40.],
       [18., 70., 47., 40.]], dtype=float32)
X = classArray[:,:-1]
X[:2]
array([[15., 56., 40.],
       [10., 51., 35.]], dtype=float32)
y = classArray[:,-1]
y[:2]
array([40., 40.], dtype=float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
clf = GaussianNB()
clf.fit(X_train, y_train)
GaussianNB()
y_predict = clf.predict(X_test)
import matplotlib.pyplot as plt
fig= plt.figure(figsize=(12,6))
plt.scatter(y_test,y_predict,alpha=0.01,s=100)
<matplotlib.collections.PathCollection at 0x2b37a32db20>

from sklearn.metrics import confusion_matrix
confMatrix = confusion_matrix(y_test,y_predict)
confMatrix
array([[238,   0,  12,   1],
       [ 10, 111,   3,   0],
       [  2,   6,  45,   8],
       [  0,   0,  39, 302]], dtype=int64)
#!pip install seaborn
import seaborn as sns; sns.set_theme()
fig= plt.figure(figsize=(12,6))
ax = sns.heatmap(confMatrix)

# generation of output raster
import rasterio 

res = 0.0005
x0,y0 = -73.2870, -3.9635
xLL, yLL = x0 - res/2, y0 - res/2
nrow, ncol = 60, 100
#definition of the raster transform array
from rasterio.transform import Affine
transform = Affine.translation(xLL, yLL)*Affine.scale(res,res)
transform
Affine(0.0005, 0.0, -73.28725,
       0.0, 0.0005, -3.9637499999999997)
#get crs as wkt
from rasterio.crs import CRS
rasterCrs = CRS.from_epsg(4326)
rasterCrs.data
{'init': 'epsg:4326'}
imageRst =  rasterio.open('../Data/Rst/riverImage2.tif')

# open list of image bands
redBand = imageRst.read(1)
greenBand = imageRst.read(2)
blueBand = imageRst.read(3)
bandList = [redBand, greenBand, blueBand]
import rasterio

# empty array
classArray = np.zeros([nrow,ncol])

### From Part 1



#define a function that returns a band value
def rasterValue(imageRst,imageBand,x,y):
    row, col = imageRst.index(x,y)
    bandValue = imageBand[row,col]
    return(bandValue)

#create complex function for all rows, cols and bands
import sys
classList = []

# get a sample pixel coordinates 
for row in range(nrow):
    for col in range(ncol):
        sys.stdout.write('r'+'Working with row, col: %d %d'%(row,col))
        partialLine = []
        #x,y = classRst.xy(row,col)
        x = x0 + col*res #<--new
        y = y0 + row*res #<--new
        for band in bandList:
            value = rasterValue(imageRst,band,x,y)
            partialLine.append(value)
        y_predict = clf.predict([np.array(partialLine)]) #<--new
        classArray[row,col] = y_predict #<--new
        #partialLine.append(classBand[row,col])
        #classList.append(partialLine)
Working with row, col: 59 99
#show class array
classArray
array([[40., 40., 40., ..., 40., 40., 40.],
       [40., 40., 40., ..., 40., 40., 40.],
       [40., 40., 40., ..., 40., 40., 40.],
       ...,
       [10., 30., 40., ..., 40., 40., 40.],
       [30., 10., 30., ..., 40., 40., 40.],
       [40., 40., 30., ..., 40., 40., 40.]])
#plot array
plt.imshow(classArray)
<matplotlib.image.AxesImage at 0x2b37df03400>

#definition, register and close of interpolated raster
interpRaster = rasterio.open('../Data/Class/predictedLandCover.tif',
                                'w',
                                driver='GTiff',
                                height=nrow,
                                width=ncol,
                                count=1,
                                dtype=np.float32,
                                crs=rasterCrs,
                                transform=transform,
                                )
interpRaster.write(classArray,1)
interpRaster.close()

Input data

You can download the input data from this link.

—————
TRANSCOM ISPFree Sigma HSE Email
Level 6 Domain Names are FREE – Register Now.

Related Posts

Hydrate

EGU2024

The mountain hydrology team will have full presence at EGU2024. Below you’ll find a list of the primary presentations by our team.

Full programme can be

Boost Inflight Internet- Business Hosting- Secure Email Account- Dropcatch Domain Names- Antisnoop Email- Free Secure Email- Cheap VOIP Calls- Free Hosting and Email- Aero Connectivity- Premium Domains for Sale- Transcom Telecom- Satphone Airtime- Mobile Plans- Free Domain Names- Organic Products- Aviation News
Transcom ISP - Transcom VOIP - Free Secure Email - Dropcatch Software - FastApn Inflight - Aero Connect - Premium Domains