#Import required packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
Part 3
Data Analysis & Visualization
Determing the vulnerability of forest carbon projects to wildfires
Now that the data is clean and in a useable format, we’ll analyse it and make visualizations to detrmine the trends in vulnerability of projects to wildfire
#Imprting the dataframe created at the end of Part 2
= gpd.read_file('path/to/your/geopackage/ad_ifm_fc_test.gpkg') ad_ifm_fc
Number of projects by fire risk category and project type
We will analyze the number of projects in each fire risk category by project type, and then create a table to present the data
To determine the number of projects in each fire risk category, group the dataframe by the Project Type and fire_risk_type columns, and then count the number of observations within each combination of these categories.
Next, pivot the table such that we set index on the fire_risk_type and set the Project Type as columns. Finally, add a Total column to sum the number of projects across all project types for each fire risk category
# Group projects by 'Project Type' and 'Fire Risk Type' and count the number of projects in each group
= ad_ifm_fc.groupby(['Project Type', 'fire_risk_type']).size().reset_index(name='Number of Projects')
risk_counts
# Create a pivot table with 'Fire Risk Type' as the index and 'Project Type' as columns, summing the number of projects
= risk_counts.pivot_table(index='fire_risk_type', columns='Project Type', values='Number of Projects', aggfunc='sum')
risk_count_pivot
# Add a 'Total' column that sums the number of projects across all project types for each fire risk type
'Total'] = risk_count_pivot.sum(axis=1)
risk_count_pivot[
=0, inplace=True)
risk_count_pivot.reset_index(level
# Print the resulting pivot table
risk_count_pivot
Currently, the data is sorted alphabetically based on the values in the fire_risk_type column.
To sort the data in a more meaningful way, convert the fire_risk_type column to a categorical type with a specific order.
'fire_risk_type'] = pd.Categorical(risk_count_pivot['fire_risk_type'], ["High", "Medium", "Low", "Non fire-prone"])
risk_count_pivot[
#Sorting the data by the fire_risk_type_column
= risk_count_pivot.sort_values('fire_risk_type')
risk_count_pivot
risk_count_pivot
This table is informative but somewhat basic, let’s enhance its presentation.
#Import package
from great_tables import GT, md, html
= (
gt_fire_class # Create a GT table object from the pivoted risk count DataFrame
GT(risk_count_pivot)
.tab_header(=md('**Distribution of Avoided Deforestation (AD) and Improved Forest Management (IFM) Projects by Fire Risk Category**')
title# Set the table header with a bold title describing the distribution of AD and IFM projects by fire risk category
)=md("**Project Type**"), columns=["AD", "IFM"]) # Create a column spanner labeled "Project Type" for the AD and IFM columns
.tab_spanner(label
.cols_label(# Define labels for the columns with bold text formatting
=md("**Fire Risk Category**"), # Label for the fire_risk_type column
fire_risk_type=md("**Num AD**"), # Label for the AD column
AD=md("**Num IFM**"), # Label for the IFM column
IFM=md("**Total Projects**") # Label for the Total column
Total
)
.cols_align(="center", columns=["AD", "IFM", "Total"] # Center-align the text in the AD, IFM, and Total columns
align
)
)
# Render the formatted table gt_fire_class
Percentage of projects in each fire risk category
In this section, we will visualize the distribution of projects across different fire risk categories by creating a pie chart.
We will start by counting the frequency of each fire risk type and then calculate the corresponding percentages for each category.
# Count frequencies of Fire Risk types
= ad_ifm_fc['fire_risk_type'].value_counts()
fire_risk_type_counts
# Calculate percentages
= (fire_risk_type_counts / fire_risk_type_counts.sum()) * 100
fire_risk_type_percentages
fire_risk_type_percentages
Next, use the percentages from the fire_risk_type_percentages dataframe to create a pie chart for visualization.
#Labels
= ['High', 'Non Fire-prone', 'Low', 'Medium']
labels
# Plotting the pie chart
=(8, 8)) # Adjust the figure size if needed
plt.figure(figsize= ['#da1e28', '#64dfdf', '#ffd7d9', '#ff8389']
colors =labels, autopct='%1.1f%%', colors=colors, startangle=140, textprops={'fontsize': 14})
plt.pie(fire_risk_type_percentages, labels
# Adding title
'Percentage of AD and IFM Projects by Fire Risk Category', fontsize=16, loc='left', pad=20)
plt.title(
# Show plot
'equal') # Equal aspect ratio ensures the pie chart is circular.
plt.axis('fire_risk_high_res.png', dpi=300)
plt.savefig(
plt.show()
Finding the area under each risk category by ecosystem type
We start by calculating the area of each project.
To ensure accurate area calculations, we need to reproject each project geometry to its respective UTM zone. For this, we will create two functions:
Determine the UTM Zone: This function calculates the UTM zone for each project based on its latitude and longitude.
Calculate and Convert Area: This function extracts latitude and longitude from the centroids, applies the UTM zone determination, computes the area (defaulting to square meters), and then converts the area to square kilometers.
#Import packages
import geopandas as gpd
from pyproj import Proj, transform, CRS
# Function to determine UTM zone for a given geometry
def get_utm_zone(lon, lat):
= str((int((lon + 180) / 6) % 60) + 1)
utm_band if len(utm_band) == 1:
= '0' + utm_band
utm_band if lat >= 0:
= f'326{utm_band}'
epsg_code else:
= f'327{utm_band}'
epsg_code return CRS(f'EPSG:{epsg_code}')
#Convert the geomtery to desired utm and compute area in km2
def convert_to_utm_and_compute_area(row):
= row.geometry
geom = geom.centroid.coords[0]
lon, lat = get_utm_zone(lon, lat)
utm_crs # Create a GeoDataFrame for the single geometry to use the to_crs method
= gpd.GeoDataFrame(index=[0], crs='EPSG:4326', geometry=[geom])
geom_gdf = geom_gdf.to_crs(utm_crs)
geom_utm = geom_utm.area[0]/1e6
area_km2 return area_km2
#Applying the convert_to_utm_and_compute_area function
= 'EPSG:4326' # Ensure the CRS is set to EPSG:4326
ad_ifm_fc.crs 'area_km2'] = ad_ifm_fc.apply(convert_to_utm_and_compute_area, axis=1) #Creating a new column, 'area_km2'. ad_ifm_fc[
Since our dataframe includes both point and polygon geometries, the function will only compute the area for polygons, leaving ‘NA’ for points.
We will first filter out the ‘NA’ values from the area_km2 column and convert the remaining values to float.
Next, we’ll group the data by fire_risk_type and ecosystem_type, and sum the areas within each group.
ad_ifm_fc
#Filter the NA values from the the area_km2 column
= ad_ifm_fc[ad_ifm_fc['area_km2'] != 'NA']
ad_ifm_fc_clean
#Convert the values in the area_km2 column to float
'area_km2'] = ad_ifm_fc_clean['area_km2'].astype(float)
ad_ifm_fc_clean[
# Group by Fire Risk type and Ecosystem type, then sum the areas
= ad_ifm_fc_clean.groupby(['fire_risk_type', 'ecosystem_type'])['area_km2'].sum().reset_index()
area_by_risk_and_ecosystem
#area_by_risk_and_ecosystem = area_by_risk_and_ecosystem.iloc[1:].reset_index(drop=True)
area_by_risk_and_ecosystem
Since we do not want to include the “Non fire-prone” category in this graph, we will remove the last row from the dataframe.
#Removing the last row
= area_by_risk_and_ecosystem.iloc[:-1 , :] area_by_risk_and_ecosystem
As we did when creating the previous table, convert the fire_risk_type column to a categorical type with a specified order for the categories.
'fire_risk_type'] = pd.Categorical(area_by_risk_and_ecosystem['fire_risk_type'], ["Low", "Medium", "High"]) area_by_risk_and_ecosystem[
Finally, we create a grouped horizontal bar graph with fire_risk_type and ecosystem_type grouped on the y-axis. The x-axis represents the area under each category.
# Define custom colors for each Fire Risk type
= {'High': '#da1e28', 'Medium': '#ff8389', 'Low': '#ffd7d9'}
colors #Labels
= ['Low', 'Medium', 'High']
labels
# Plotting
= plt.subplots(figsize=(10, 6))
fig, ax ='ecosystem_type', columns='fire_risk_type', values='area_km2').plot(kind='barh', stacked=False,
area_by_risk_and_ecosystem.pivot(index=0.7, ax=ax, logx=True, color=colors)
width
# Annotate each bar with its area value
for p in ax.patches:
= p.get_width()
width = p.get_height()
height = p.get_xy()
x, y f'{int(width)}', (x + width * 1.05, y + height / 2), va='center', ha='left')
ax.annotate(
# Show plot
=False, loc='upper left', bbox_to_anchor=(0.001, 1), ncol=3, labels=labels)
plt.legend(frameon# Remove the x-axis ticks and labels
='x', which='both', bottom=False, top=False, labelbottom=False)
ax.tick_params(axis# Increase y-axis tick label font size
='y', labelsize=14)
ax.tick_params(axis
# Customize plot
'')
ax.set_ylabel('Area ($Km^2$)', fontsize=14)
ax.set_xlabel('Area of Projects Under Fire Risk Categories by Ecosystem Type', fontsize=16, loc='left', pad=20)
ax.set_title('top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines[
# Adjust layout and show plot
plt.tight_layout()
'high_resolution_plot.png', dpi=300)
plt.savefig(
plt.show()