library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
getwd()
## [1] "/Users/tyan"
df <- read_delim("Downloads/ARTH_data.xlsx - Sheet1.csv")
## New names:
## Rows: 336 Columns: 24
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (22): Seattle, ART SITE, ARTIST, ART TITLE, ART TYPE, MATERIALS, MEASURE... dbl
## (2): LATITUDE, LONGITUDE
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...24`
dim(df)
## [1] 336 24
df
## # A tibble: 336 × 24
## Seattle `ART SITE` ARTIST `ART TITLE` `ART TYPE` MATERIALS MEASUREMENTS
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Seattle Adams Elementar… Hamme… "\"Guardia… Painting Gouache … 22 1/8 in x…
## 2 Seattle Adams Elementar… MacKa… "\"Sunflow… Photograph Silver d… 20 in x 24 …
## 3 Seattle Adams Elementar… MacKa… "\"Lisiant… Photograph Silver d… 20 in x 24 …
## 4 Seattle Arbor Heights E… Carl … "\"Puget S… Sculpture Glass mo… 8 ft 4 in x…
## 5 Seattle Ballard High Sc… Katz,… "\"Safety … Sculpture Painted … 34 in x 12 …
## 6 Seattle Ballard High Sc… Knigh… "\"Diva\"" Print Serigrap… 28 in x 23 …
## 7 Seattle Ballard High Sc… Lawre… "\"Windows… Print Offset l… 21 3/4 in x…
## 8 Seattle Ballard High Sc… Moila… "\"Olympic… Painting Oil pain… 31 in 6 in …
## 9 Seattle Ballard High Sc… Moila… "\"Gillnet… Painting Oil pain… 3 ft x 6 ft
## 10 Seattle Bryn Mawr Eleme… Lawre… "\"Builder… Print Offset l… 30 in x 21 …
## # ℹ 326 more rows
## # ℹ 17 more variables: `DATE MADE` <chr>, LOCATION <chr>, ADDRESS <chr>,
## # `ZIP CODE` <chr>, `SEE MORE AT` <chr>, LATITUDE <dbl>, LONGITUDE <dbl>,
## # GEOCOORDINATES <chr>, `ID NUMBER` <chr>, `CREDIT LINE` <chr>,
## # AGE_RANGE <chr>, `ASSOCIATED GROUP/COMMUNITY` <chr>, `SITE TYPE` <chr>,
## # `THEMES/SUBJECT` <chr>, `ADDITIONAL NOTES ON CONTENT` <chr>,
## # `IS ARTIST LOCAL?` <chr>, ...24 <chr>
df_clean <- df %>%
select(SiteType = `SITE TYPE`, Theme = `THEMES/SUBJECT`, IsLocal = `IS ARTIST LOCAL?`) %>%
drop_na() %>%
mutate(Theme = str_trim(Theme)) %>%
separate_rows(Theme, sep = ",\\s*") # separate on comma + optional space
# Count themes per site type
theme_counts <- df_clean %>%
group_by(SiteType, Theme) %>%
tally(name = "Count")
# Get most common theme per site type
most_common_themes <- theme_counts %>%
group_by(SiteType) %>%
filter(Count == max(Count))
# Bar graph: Site type on x-axis, fill by Theme
ggplot(most_common_themes, aes(x = reorder(SiteType, -Count), y = Count, fill = Theme)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(title = "Most Common Theme by Site Type",
x = "Site Type",
y = "Number of Artworks",
fill = "Theme") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Clean and prepare data
df_local <- df %>%
select(SiteType = `SITE TYPE`, Theme = `THEMES/SUBJECT`, IsLocal = `IS ARTIST LOCAL?`) %>%
drop_na() %>%
mutate(Theme = str_trim(Theme)) %>%
separate_rows(Theme, sep = ",\\s*") # separate on comma + optional space
# Count themes by local/non-local
theme_by_local <- df_local %>%
group_by(IsLocal, Theme) %>%
tally(name = "Count")
# Bar graph: Theme on x-axis, fill by Local/Non-local
ggplot(theme_by_local, aes(x = reorder(Theme, -Count), y = Count, fill = IsLocal)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
labs(title = "Themes by Artist Locality",
x = "Theme",
y = "Number of Artworks",
fill = "Is Local?") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

df_sep <- df %>%
select(IsLocal = `IS ARTIST LOCAL?`, Theme = `THEMES/SUBJECT`) %>%
drop_na() %>%
mutate(Theme = str_trim(Theme)) %>%
separate_rows(Theme, sep = ",\\s*")
# Convert to uppercase for consistent matching
df_sep <- df_sep %>%
mutate(Theme = toupper(Theme))
# Count total artists in each group (local/non-local)
total_by_locality <- df_sep %>%
distinct(IsLocal, row_number()) %>% # approximate unique entries
count(IsLocal, name = "TotalArtists")
# Count artists who did PNW-themed work
pnw_by_locality <- df_sep %>%
filter(str_detect(Theme, "PNW")) %>%
count(IsLocal, name = "PNW_Artists")
# Join and calculate percentage
pnw_percent <- left_join(pnw_by_locality, total_by_locality, by = "IsLocal") %>%
mutate(PercentPNW = round(100 * PNW_Artists / TotalArtists, 1))
print(pnw_percent)
## # A tibble: 2 × 4
## IsLocal PNW_Artists TotalArtists PercentPNW
## <chr> <int> <int> <dbl>
## 1 No 12 352 3.4
## 2 Yes 34 358 9.5