import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Filter warnings
from warnings import filterwarnings
filterwarnings('ignore')


# Load the dataset
world_risk_index = pd.read_csv('world_risk_index.csv')

# Display the first few rows of the dataset
world_risk_index.head()


# Check for missing values in the dataset
missing_values = world_risk_index.isnull().sum()

missing_values

Region                          0
WRI                             0
Exposure                        0
Vulnerability                   0
Susceptibility                  0
Lack of Coping Capabilities     0
 Lack of Adaptive Capacities    1
Year                            0
Exposure Category               0
WRI Category                    1
Vulnerability Category          4
Susceptibility Category         0
dtype: int64


# Drop rows with missing values
world_risk_index_cleaned = world_risk_index.dropna()

# Check the shape of the cleaned dataset
cleaned_data_shape = world_risk_index_cleaned.shape

cleaned_data_shape

(1912, 12)


# Check for duplicate rows in the dataset
duplicate_rows = world_risk_index.duplicated().sum()

duplicate_rows

0


# Plotting the trend of World Risk Index over the years (Global)
plt.figure(figsize=(14, 7))
sns.lineplot(data=world_risk_index_cleaned, x="Year", y="WRI", ci=None)
plt.title("Trend of World Risk Index Over Time (Global)", fontweight='bold')
plt.ylabel("World Risk Index", fontweight='bold')
plt.xlabel("Year", fontweight='bold')
plt.show()


# Determine the most recent year in the dataset
latest_year = world_risk_index_cleaned["Year"].max()

#Filtering data for the most recent year
latest_year_data_global = world_risk_index_cleaned[world_risk_index_cleaned["Year"] == latest_year]

# Sorting by Vulnerability and selecting the top 10 regions
top_vulnerable_regions = latest_year_data_global.sort_values(by="Vulnerability", ascending=False).head(10)

# Plotting the top 10 vulnerable regions
plt.figure(figsize=(14, 7))
sns.barplot(x="Vulnerability", y="Region", data=top_vulnerable_regions, palette="Reds_d")
plt.title(f"Top 10 Most Vulnerable Regions in {latest_year}", fontweight='bold')
plt.xlabel("Vulnerability Score", fontweight='bold')
plt.ylabel("Region", fontweight='bold')
plt.show()


# Plotting the relationship between Exposure and Lack of Coping Capabilities with a trend line
plt.figure(figsize=(14, 7))
sns.regplot(x="Exposure", y="Lack of Coping Capabilities", data=latest_year_data_global, scatter_kws={'s':100, 'edgecolor':'w'}, line_kws={'color':'red'}, ci=None)
plt.title(f"Exposure vs. Lack of Coping Capabilities in {latest_year}", fontweight='bold')
plt.xlabel("Exposure Score", fontweight='bold')
plt.ylabel("Lack of Coping Capabilities Score", fontweight='bold')
plt.show()


# Counting the number of regions in each WRI category
wri_category_counts_global = latest_year_data_global["WRI Category"].value_counts()

# Plotting the distribution of WRI categories (Global)
plt.figure(figsize=(14, 7))
wri_category_counts_global.plot(kind="bar", color=sns.color_palette("viridis", len(wri_category_counts_global)))
plt.title(f"Distribution of Regions by World Risk Index (WRI) Category in {latest_year} (Global)", fontweight='bold')
plt.xlabel("WRI Category", fontweight='bold')
plt.ylabel("Number of Regions", fontweight='bold')
plt.xticks(rotation=45)
plt.show()


# List of North American countries for filtering
north_american_countries = ["Canada", "United States", "Mexico"]

# Filtering the data for North American countries
north_american_data = world_risk_index_cleaned[world_risk_index_cleaned["Region"].isin(north_american_countries)]

# Plotting the trend of World Risk Index over the years (North America)
plt.figure(figsize=(14, 7))
sns.lineplot(data=north_american_data, x="Year", y="WRI", hue="Region", ci=None)
plt.title("Trend of World Risk Index Over Time (North America)", fontweight='bold')
plt.ylabel("World Risk Index", fontweight='bold')
plt.xlabel("Year", fontweight='bold')
plt.legend(title="Country")
plt.show()


# Filtering data for Canada
canada_data = world_risk_index_cleaned[world_risk_index_cleaned["Region"] == "Canada"]

# Plotting the trend of various metrics over time (Canada)
metrics = ["WRI", "Exposure", "Vulnerability", "Susceptibility", "Lack of Coping Capabilities", " Lack of Adaptive Capacities"]

plt.figure(figsize=(15, 10))
for metric in metrics:
    sns.lineplot(data=canada_data, x="Year", y=metric, label=metric)

plt.title("Trend of Various Metrics Over Time (Canada)", fontweight='bold')
plt.ylabel("Score", fontweight='bold')
plt.xlabel("Year", fontweight='bold')
plt.legend(title="Metric")
plt.show()

	Region	WRI	Exposure	Vulnerability	Susceptibility	Lack of Coping Capabilities	Lack of Adaptive Capacities	Year	Exposure Category	WRI Category	Vulnerability Category	Susceptibility Category
0	Vanuatu	32.00	56.33	56.81	37.14	79.34	53.96	2011	Very High	Very High	High	High
1	Tonga	29.08	56.04	51.90	28.94	81.80	44.97	2011	Very High	Very High	Medium	Medium
2	Philippinen	24.32	45.09	53.93	34.99	82.78	44.01	2011	Very High	Very High	High	High
3	Salomonen	23.51	36.40	64.60	44.11	85.95	63.74	2011	Very High	Very High	Very High	High
4	Guatemala	20.88	38.42	54.35	35.36	77.83	49.87	2011	Very High	Very High	High	High

Column Name	Description
Region	The region or country being assessed.
WRI	World Risk Index score.
Exposure	Exposure to natural disasters.
Vulnerability	Overall vulnerability to natural disasters.
Susceptibility	Susceptibility to natural disasters.
Lack of Coping Capabilities	The region's inability to cope with the effects of a disaster.
Lack of Adaptive Capacities	The region's inability to adapt to potential future disasters.
Year	The year of the assessment.
Exposure Category	Categorical representation of exposure (e.g., "Very High").
WRI Category	Categorical representation of the World Risk Index score.
Vulnerability Category	Categorical representation of vulnerability.
Susceptibility Category	Categorical representation of susceptibility.

Natural Disasters Analysis - World Risk Index¶

Authors: Women of the West Coast (WWC)¶

Date: Oct. 22, 2023¶

Table of Contents¶

Introduction¶

Data Processing¶

Data Cleaning¶

Step 1: Checking for Missing Values¶

Step 2: Handling Duplicate Entries¶

Global Analysis¶

Trend of World Risk Index Over Time¶

Top Regions with the Highest Vulnerability Scores¶

Natural Disasters vs. Lack of Coping Capabilities¶

Distribution of Regions by WRI Category (Global - Latest Year)¶

North American Analysis¶

Trend of World Risk Index Over Time (North America)¶

Canada-Specific Analysis¶

Trend of Various Metrics Over Time¶

Insights and Conclusion¶