---
title: "Stats 140 Project"
author: "Gavin Cardeno"
output: html_document
date: "2025-03-06"
---

# The Relationship Between GDP, Unemployment, Inflation, and Economic Growth: A Global Economic Analysis (2010-2025)


```{r}
library(ggplot2)
library(dplyr)
library(corrplot)
library(tidyverse)

data <- read.csv("/Users/gavcard/Downloads/archive (1)/Economic Indicators And Inflation.csv")

colnames(data) <- c("Country", "Year", "GDP", "Inflation", "Unemployment", "EconGrowth")
data$Year <- as.numeric(data$Year)
sum(is.na(data)) # proved clean

data %>%
  group_by(Country) %>%
  summarise(
    Avg_GDP = mean(GDP, na.rm = TRUE),
    Avg_Inflation = mean(Inflation, na.rm = TRUE),
    Avg_Unemployment = mean(Unemployment, na.rm = TRUE),
    Avg_EconGrowth = mean(EconGrowth, na.rm = TRUE)
  )
# Beginning of EDA

# correlation matrix
cor_matrix <- cor(data[, c("GDP", "Inflation", "Unemployment", "EconGrowth")], use = "complete.obs")
print(cor_matrix)

# Scatter plots for variable relations
 
ggplot(data, aes(x = GDP, y = EconGrowth)) + geom_point() + geom_smooth(method = "lm") + 
  labs(title = "GDP vs. Economic Growth")

ggplot(data, aes(x = Inflation, y = Unemployment)) + geom_point() + geom_smooth(method = "lm") + 
  labs(title = "Inflation vs. Unemployment")

ggplot(data, aes(x = Inflation, y = EconGrowth)) + geom_point() + geom_smooth(method = "lm") + 
  labs(title = "Inflation vs. Economic Growth")

# Correlation Heat map

cor_matrix <- cor(data[, c("GDP", "Inflation", "Unemployment", "EconGrowth")])
print(cor_matrix)
corrplot(cor_matrix, method = "color", type = "lower", tl.cex = 0.8) # correlation heatmap

# Modeling & Analysis part

# linear regression model
library(tidyverse)
model <- lm(EconGrowth ~ GDP + Inflation + Unemployment, data = data)
summary(model)

# time series analysis
ggplot(data, aes(x = Year, y = GDP, color = Country)) + # GDP over time for all countries
  geom_line() + 
  geom_point() + 
  labs(title = "GDP over Time by Country", x = "Year", y = "GDP (in Billion USD)")

# forecast for avg gdp of all countries
avg_gdp <- data %>% # calc avg gdp of all countries
  group_by(Year) %>%
  summarise(avg_GDP = mean(GDP, na.rm = TRUE))

avg_gdp_ts <- ts(avg_gdp$avg_GDP, start = min(avg_gdp$Year), frequency = 1)

fit <- auto.arima(avg_gdp_ts) # Fit an ARIMA model

forecast_values <- forecast(fit, h = 5)
plot(forecast_values, main = "Forecast of Average GDP for Next 5 Years")
```