Code
library(osmdata)
library(sf)
library(ggplot2)
library(tidyverse)
library(tidycensus)
library(tigris)
library(units)
library(kableExtra)
# Set Census API key
census_api_key("71ee368dd6a9d472f0ba814e6f90b8566a1a7192")
A Spatial Analysis of Urban Green Space Distribution
Zhongyu Zhou
December 4, 2024
In urban settings, public green areas are essential because they improve inhabitants’ physical and emotional well-being. The accessibility of these areas has a direct impact on quality of life in light of growing urbanization and environmental issues. However, there are frequently differences in the distribution of green space in cities, which could lead to unequal access for various people.
This study examines the accessibility and spatial distribution of public green spaces in Buffalo, New York. The following are the main research questions: 1. How accessible are public green areas to Buffalo residents who live within a 400-meter (5-minute) walking distance? 2. How does the accessibility of green spaces change in various census tracts? 3. How does the accessibility of green spaces at the census tract level relate to median household income?
This analysis integrates data from two primary sources:
The methodology follows these key steps:
# Get Buffalo city boundary
buffalo_boundary <- places(state = "NY") %>%
filter(NAME == "Buffalo") %>%
st_transform(4326)
# Get green spaces data
buffalo_bbox <- st_bbox(buffalo_boundary)
buffalo_green <- opq(buffalo_bbox) %>%
add_osm_feature(key = "leisure", value = c("park", "garden")) %>%
osmdata_sf()
# Process green spaces
green_spaces <- buffalo_green$osm_polygons %>%
select(osm_id, name) %>%
st_transform(4326) %>%
st_intersection(buffalo_boundary)
# Get demographic data
buffalo_pop <- get_acs(
geography = "tract",
variables = c(
population = "B01003_001",
median_income = "B19013_001"
),
state = "NY",
county = "Erie",
year = 2020,
geometry = TRUE
) %>%
select(-moe) %>%
pivot_wider(
id_cols = c(GEOID, geometry),
names_from = "variable",
values_from = "estimate"
) %>%
st_as_sf() %>%
st_transform(4326) %>%
st_intersection(buffalo_boundary)
calculate_accessibility <- function(tract, green_spaces, buffer_distance = 400) {
tryCatch({
# Convert to projected coordinate system
tract_proj <- st_transform(st_geometry(tract), 32617)
green_spaces_proj <- st_transform(green_spaces, 32617)
# Get population data
tract_population <- tract$population
tract_area_km2 <- as.numeric(st_area(tract_proj)) / 1000000 # 转换到平方公里
population_density <- tract_population / tract_area_km2
# Create a buffer
tract_buffer <- st_buffer(tract_proj, buffer_distance)
# Find accessible green space
intersecting <- st_intersects(tract_buffer, green_spaces_proj)
if(length(unlist(intersecting)) == 0) {
return(0)
}
# Get the intersecting green space
accessible_gs <- green_spaces_proj[unlist(intersecting),]
# Calculate intersection
intersection <- st_intersection(tract_buffer, accessible_gs)
# Basic calculations
intersection_area <- sum(st_area(intersection))
tract_area <- st_area(tract_proj)
# 1. Basic coverage
coverage_ratio <- as.numeric(intersection_area) / as.numeric(tract_area)
# 2. Green space size weight
gs_sizes <- st_area(accessible_gs)
size_weight <- 1 + log1p(as.numeric(gs_sizes) / min(as.numeric(gs_sizes)))
# 3. Distance weight
distances <- st_distance(tract_proj, accessible_gs)
distance_weights <- 1 - (as.numeric(distances) / buffer_distance)
distance_weights[distance_weights < 0] <- 0
# 4. Population density weight
# Use an inverse relationship: the higher the population density, the greater the demand for green space
density_factor <- 1 + log1p(population_density / mean(buffalo_pop$population /
(as.numeric(st_area(buffalo_pop)) / 1000000)))
# Comprehensive calculation
# When the population density is high, the same green space coverage will get a lower score
weighted_score <- coverage_ratio * mean(size_weight * distance_weights) / density_factor
# Normalize to between 0 and 1
return(min(1, weighted_score))
}, error = function(e) {
warning(paste("calculation mistake:", e$message))
return(0)
})
}
buffalo_pop$accessibility <- sapply(
1:nrow(buffalo_pop),
function(i) {
if(i %% 10 == 0) cat(sprintf("Processing tract %d of %d\n", i, nrow(buffalo_pop)))
calculate_accessibility(buffalo_pop[i,], green_spaces)
}
)
Our analysis reveals several key findings about green space accessibility in Buffalo:
# Create income distribution map
ggplot() +
geom_sf(data = buffalo_pop, aes(fill = median_income)) +
scale_fill_viridis_c(
name = "Median Income",
labels = scales::dollar_format(),
na.value = "grey50",
option = "magma" # different color scheme from accessibility map
) +
theme_minimal() +
labs(
title = "Median Household Income Distribution in Buffalo",
subtitle = "Based on Census Tract Level Data"
)
# Create accessibility map
ggplot() +
geom_sf(data = buffalo_pop, aes(fill = accessibility)) +
scale_fill_viridis_c(
name = "Accessibility Score",
limits = c(0, 1),
labels = scales::percent,
na.value = "grey50"
) +
geom_sf(data = green_spaces, fill = "darkgreen", alpha = 0.3) +
theme_minimal() +
labs(
title = "Green Space Accessibility in Buffalo",
subtitle = "Percentage of area within 5-minute walking distance of green spaces"
)
# Data cleaning and outlier handling
buffalo_pop_clean <- buffalo_pop %>%
# Remove outliers in income
filter(!is.na(median_income)) %>%
filter(median_income > quantile(median_income, 0.01, na.rm = TRUE) &
median_income < quantile(median_income, 0.99, na.rm = TRUE)) %>%
# Remove extreme values of accessibility
filter(accessibility > quantile(accessibility, 0.01, na.rm = TRUE) &
accessibility < quantile(accessibility, 0.99, na.rm = TRUE))
# Create income quantiles
buffalo_pop_clean <- buffalo_pop_clean %>%
mutate(
income_quintile = cut(median_income,
breaks = quantile(median_income, probs = seq(0, 1, 0.2), na.rm = TRUE),
labels = c("Lowest 20%", "20-40%", "40-60%", "60-80%", "Highest 20%"),
include.lowest = TRUE)
)
# Calculate the average accessibility for each income quantile
quintile_summary <- buffalo_pop_clean %>%
group_by(income_quintile) %>%
summarise(
mean_accessibility = mean(accessibility, na.rm = TRUE),
median_accessibility = median(accessibility, na.rm = TRUE),
n_tracts = n()
)
# Create a scatter plot
improved_income_plot <- ggplot(buffalo_pop_clean,
aes(x = median_income, y = accessibility)) +
# Add scatter points
geom_point(aes(color = income_quintile),
shape = 18,
size = 3) +
# Add a trend line
geom_smooth(method = "loess", color = "black", linetype = "dashed") +
scale_x_continuous(labels = scales::dollar_format(),
breaks = scales::pretty_breaks(n = 8)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1),
limits = c(0, 1)) +
scale_color_manual(values = c("#4B0082", "#4682B4", "#2E8B57", "#FFA500", "#DC143C")) +
theme_minimal() +
labs(
title = "Relationship between Income and Green Space Accessibility",
subtitle = "With smoothed trend line",
x = "Median Household Income",
y = "Green Space Accessibility Score",
color = "Income Quintile"
)
print(improved_income_plot)
income_quintile | mean_accessibility | median_accessibility | n_tracts | geometry |
---|---|---|---|---|
Lowest 20% | 0.3094663 | 0.2593383 | 15 | MULTIPOLYGON (((-78.86848 4... |
20-40% | 0.3363453 | 0.3295316 | 15 | MULTIPOLYGON (((-78.82441 4... |
40-60% | 0.3320864 | 0.1718203 | 14 | MULTIPOLYGON (((-78.90868 4... |
60-80% | 0.2856320 | 0.2191314 | 15 | MULTIPOLYGON (((-78.86808 4... |
Highest 20% | 0.2639786 | 0.1719957 | 15 | MULTIPOLYGON (((-78.88936 4... |
The results show a complex pattern of green space accessibility in Buffalo:
There is an unequal spatial distribution, with the Central and Southeast having lesser accessibility and the Northeast and Midwest having better accessibility.
The link between accessibility and income is U-shaped, with middle-income communities having the least accessibility.
Accessibility is worse in heavily crowded areas and better around large parks.
Our analysis, which takes into account population density and spatial coverage, shows intricate trends in the accessibility of green spaces at various income levels. Future studies should look at other socioeconomic characteristics, road networks, and the quality of green spaces. Urban planning plans may be influenced by these findings to distribute green space more fairly among all communities.