#Import packages
import rasterio
from rasterio.mask import mask
from rasterio.warp import Resampling, reproject, calculate_default_transform
from rasterio.plot import show
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.wkt import loads
import os
from pyproj import Proj, transform, CRS
from shapely.geometry import MultiPolygon
Part 2
Data Preparation - Intersect Global Forest Watch - Loss Year data with Carbon Offset Projects Geospatial Database
In this tutorial, we will intersect the VRT generated at the end of the previous tutorial with the geodataframe to clip the VRT to each project’s individual raster and save this as a .tif file. Following this, we will compute the area for each loss year within these rasters and integrate this information into our geodataframe.
Import the packages below
Loading the projects geodatabase (accounting area as geometry)
Filtering for AD and IFM Projects
We will conduct this analysis exclusively for Avoided Deforestation and Improved Forest Management projects.
Filtering for Polygons only
This database includes project boundaries in polygon format, while projects with unavailable boundaries are represented by points. To calculate the area of forest loss within project boundaries, we will use only those projects with polygon boundaries and exclude those represented solely by point locations.
= gpd.read_file('path/to/your/file/project_accounting_areas.gpkg')
all_gdfs all_gdfs
#Filtering for AD and IFM projects only
= all_gdfs[(all_gdfs['Project Type'] == "AD") | (all_gdfs["Project Type"] == "IFM")]
ad_ifm_gdf
#Filtering for projects whose boudnaries are available
= ad_ifm_gdf[ad_ifm_gdf.geom_type.isin (['Polygon' ,'MultiPolygon'])]
ad_ifm_gdf_poly ad_ifm_gdf_poly
Cliping the VRT with the filtered data frame
After filtering the dataframe, we will intersect it with the VRT to clip the VRT to each individual project raster and save these rasters as .tif files.
The following function iterates through each row of the dataframe, extracts the project geometry, clips the VRT using this geometry, and then saves the clipped raster as a .tif file, naming it according to the project ID and placing it in the specified output directory.
#Clip vrt with the projects database
def clip_vrt_with_geometries(vrt_path, gdf, output_dir):
# Open the VRT file
with rasterio.open(vrt_path) as src:
# Iterate over each geometry in the GeoDataFrame
for idx, row in gdf.iterrows():
= [row['geometry']] # Geometry to clip with
geometry = row['ProjectID'] # Project ID
project_id
# Clip the VRT using the geometry
= mask(src, geometry, crop=True)
out_image, out_transform = src.meta.copy()
out_meta
# Update metadata
out_meta.update({"driver": "GTiff",
"height": out_image.shape[1],
"width": out_image.shape[2],
"transform": out_transform
})
# Save the clipped raster
= os.path.join(output_dir, f"{project_id}.tif")
output_path with rasterio.open(output_path, "w", **out_meta) as dest:
dest.write(out_image)
print(f"Saved clipped raster to {output_path}")
Loading the GFW VRT and clipping it to individual project rasters
# Loading the vrt and geospatial projects database
= 'path/to/your/file/output.vrt'
vrt_file
# Ensure the GeoDataFrame has a column 'project_id' with unique IDs for each geometry
if 'ProjectID' not in ad_ifm_gdf.columns:
raise ValueError("GeoDataFrame must have a 'project_id' column.")
# Output directory for the clipped rasters
= 'path/to/your/directory'
output_dir if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Clip the VRT with the geometries in the GeoDataFrame
clip_vrt_with_geometries(vrt_file, all_gdfs, output_dir)
Calculate the area for each loss year for each project
To accomplish this, we first determine the UTM CRS for each project. Next, we reproject the clipped raster for each project to its respective UTM zone. Finally, we calculate the area for each loss year category within the project’s boundary.
Determine the UTM CRS for each project
The following functions calculate the UTM CRS based on a project’s latitude and longitude and then adds a new column, ‘utm_crs’, to the dataframe.
# Function to determine UTM zone for a given geometry
def get_utm_zone(lon, lat):
# Calculate the UTM band based on longitude
# Longitude is adjusted by adding 180, divided by 6, and then mod 60 to get the UTM zone number
= str((int((lon + 180) / 6) % 60) + 1)
utm_band
# Ensure the UTM band is two digits (e.g., '01' instead of '1')
if len(utm_band) == 1:
= '0' + utm_band
utm_band
# Determine EPSG code based on latitude
# EPSG code starts with 326 for northern hemisphere and 327 for southern hemisphere
if lat >= 0:
= f'326{utm_band}' #Northern Hemisphere
epsg_code else:
= f'327{utm_band}' #Southern Hemisphere
epsg_code
# Return the CRS object with the appropriate EPSG code
return CRS(f'EPSG:{epsg_code}')
def get_lat_long_utm(gdf):
#get centroids
'centroid'] = gdf.geometry.centroid
gdf[
#extract latitude longitude
'latitude'] = gdf['centroid'].y
gdf['longitude'] = gdf['centroid'].x
gdf[
#Calculate the UTM CRS for each row
'utm_crs'] = gdf.apply(lambda row: get_utm_zone(row['longitude'], row['latitude']), axis=1) #
gdf[
# Drop the centroid, latitude, and longitude columns if you no longer need them
=['longitude', 'latitude'], inplace=True)
gdf.drop(columns
return gdf
#Applying the get_lang_long_utm to get utm for each projects
= get_lat_long_utm(ad_ifm_gdf_poly)
ad_ifm_gdf_poly ad_ifm_gdf_poly
Reproject the clipped raster and compute the area for each loss year category
First, we define a function to reproject the raster for each project using the UTM zone specified in the utm_crs column.
Next, we create a function to process the raster for each project, which calculates the area of each fire loss category within the project’s geometry. This function masks the raster with the project’s geometry, reprojects it to the project’s UTM CRS, and computes the area for each category by counting pixel occurrences. It then organizes the results, including project details and calculated areas, into a dictionary format for each project and returns a list of these dictionaries.
def reproject_to_utm(src, geometry, utm_zone):
= utm_zone.to_string()
dst_crs = calculate_default_transform(
transform, width, height *src.bounds)
src.crs, dst_crs, src.width, src.height, = src.meta.copy()
kwargs
kwargs.update({'crs': dst_crs,
'transform': transform,
'width': width,
'height': height
})
= np.empty((src.count, height, width), dtype=src.dtypes[0])
destination for i in range(1, src.count + 1):
reproject(=rasterio.band(src, i),
source=destination[i - 1],
destination=src.transform,
src_transform=src.crs,
src_crs=transform,
dst_transform=dst_crs,
dst_crs=Resampling.nearest
resampling
)return destination, kwargs
def calculate_area(tif_file, gdf_row):
= []
results
with rasterio.open(tif_file) as src:
= [gdf_row['geometry']]
geometry = mask(src, geometry, crop=True, nodata=-1)
out_image, out_transform = out_image[0]
out_image #out_image[out_image == -1] = np.nan
= gdf_row['utm_crs']
utm_zone = reproject_to_utm(src, geometry, utm_zone)
reprojected_image, kwargs
= np.unique(reprojected_image, return_counts=True)
unique, counts = dict(zip(unique, counts))
total_area
= kwargs['transform'][0]
pixel_size = {cls: cnt * pixel_size**2 / 1e6 for cls, cnt in total_area.items()}
area_per_class
= {
result_row 'ProjectID': gdf_row['ProjectID'],
'Project Name': gdf_row['Project Name'],
'Registry Name': gdf_row['Registry Name'],
'Methodology': gdf_row['Methodology'],
'Country': gdf_row['Country'],
'Project Type': gdf_row['Project Type'],
'Project Start Date': gdf_row['Project Start Date'],
'Geometry Type': gdf_row['geometry'].geom_type,
'geometry': gdf_row['geometry'],
'Raster File': os.path.basename(tif_file)
}
for cls in range(24):
= 'No loss' if cls == 0 else f'{2000 + cls}(km2)'
column_name = area_per_class.get(cls, 0)
result_row[column_name]
results.append(result_row)
return results
Applying the calculate_area function to project geometry and associated raster to get area of forest loss for each year between 2001 and 2023
= 'path/to/your/file/vrt_clipped'
tif_folder
# Initialize an empty list to store all results
= []
all_results
# Iterate over each row in the GeoDataFrame
for idx, row in ad_ifm_gdf_poly.iterrows():
# Find the corresponding TIFF file in the folder
= os.path.join(tif_folder, f"{row['ProjectID']}.tif")
tif_file
# Check if the TIFF file exist
if os.path.exists(tif_file):
# Calculate area for the current TIFF file and row
= calculate_area(tif_file, row)
result_rows
# Extend results list with the calculated rows
all_results.extend(result_rows)print(f'Successfully processed {row["ProjectID"]}.tif')
else:
print(f'TIFF file {row["ProjectID"]}.tif not found in folder.')
# Convert the list of dictionaries to a GeoDataFrame
= gpd.GeoDataFrame(all_results)
result_gdf
result_gdf.head()
# Save the combined results to a new GeoPackage or CSV file
'path/to/your/file/ad_ifm_forest_loss_area.gpkg', driver='GPKG')
result_gdf.to_file(='geometry').to_csv('path/to/your/file/ad_ifm_forest_loss_area.csv', index=False) result_gdf.drop(columns