Of 352 studies, despite ~80% sharing data only ~20% shared code
…but maybe it’s just hard to change…
library(tidyverse)
library(galah)
setwd("C:\Users\KEL329\OneDrive - CSIRO\Documents\ALA\Talks\ESA2024")
galah_config(email = "dax.kellie@csiro.au")
alaData <- galah_call()|>identify("perameles")|>filter(year == 2003) |>
select(group="basic",cl22)|>
atlas_occurrences()|>
select(recordID,scientificName,decimalLongitude,decimalLatitude,eventDate,cl22) |>
janitor::clean_names()|>rename(state=cl22) |>
mutate(event_date=lubridate::ymd(event_date)) |>
group_by(state) |>count() |>drop_na()
mmap <- ozmaps::ozmap_states |>
sf::st_transform(crs=4326) |>
left_join(alaData, join_by(NAME==state)) |>
replace_na(list(n=0))
ggplot() + geom_sf(data=mmap,aes(fill=n),colour="grey60")+
viridis::scale_fill_viridis(option="F",begin=0.2,direction=-1)+theme_void()+theme(legend.position="right")
# Title: Map - number of bandicoot observations
# Author: Dax Kellie
# Date: 2024-11-16
setwd("C:\Users\KEL329\OneDrive - CSIRO\Documents\ALA\Talks\ESA2024")
# packages
library(tidyverse)
library(galah)
library(janitor)
library(sf)
library(ozmaps)
galah_config(email = "dax.kellie@csiro.au")
# download map of Australia
aus <- ozmap_states |>
st_transform(crs = 4326) # fix projection
# download bandicoot records
bandicoots <- galah_call() |>
identify("perameles") |>
filter(year == 2003) |>
select(group = "basic", cl22) |>
atlas_occurrences()
# filter data, rename column, fix date class
bandicoots |>
select(recordID, scientificName, decimalLongitude,
decimalLatitude, eventDate, cl22) |>
janitor::clean_names() |>
rename(state = cl22) |>
mutate(
event_date = lubridate::ymd(event_date)
)
# counts by state/territory
state_counts <-
bandicoots_cleaned |>
group_by(state) |>
count() |>
drop_na()
# join map with counts
aus_counts <-
aus |>
left_join(state_counts, join_by(NAME == state)) |>
replace_na(list(n = 0))
# Map
ggplot() +
geom_sf(data = aus_counts,
aes(fill = n),
colour = "grey60") +
viridis::scale_fill_viridis(option = "F",
begin = 0.2,
direction = -1) +
theme_void() +
theme(legend.position = "right")
This code reads better, but it’s no more reproducible!
Code reproducibilty depends on a reproducible work environment
.Rproj
files to tell R where your project’s top folder directory isAn online platform for storing project repositories
GitHub is useful for reasons other than collaborative code writing, too!
To get setup:
use_git()
+ use_github()
can initialise & link a local directory to a GitHub repository and it’s fast
Jenny Bryan’s file name holy trinity
Bad:
dat2024_bsrFinalDK-new.csv
script.R
Good:
2024-16-11_bandicoots.csv
map_counts-by-state.R
In order of comprehensiveness/ease:
{renv}
init()
, snapshot()
, restore()
{groundhog}
groundhog.library(pkg-name, date)
sessionInfo()
sessionInfo() |> report::report()
Locally & online
Zenodo, Open Science Framework
Generate a DOI for your data
[1] "https://doi.org/10.26197/ala.78e21acd-7516-4fb2-91fe-9b86f5fcd83b"
# Title: Map - number of bandicoot observations
# Author: Dax Kellie
# Date: 2024-11-16
setwd("C:\Users\KEL329\OneDrive - CSIRO\Documents\ALA\Talks\ESA2024")
# packages
library(tidyverse)
library(galah)
library(janitor)
library(sf)
library(ozmaps)
galah_config(email = "dax.kellie@csiro.au")
# download map of Australia
aus <- ozmap_states |>
st_transform(crs = 4326) # fix projection
# download bandicoot records
bandicoots <- galah_call() |>
identify("perameles") |>
filter(year == 2003) |>
select(group = "basic", cl22) |>
atlas_occurrences()
# filter data, rename column, fix date class
bandicoots |>
select(recordID, scientificName, decimalLongitude,
decimalLatitude, eventDate, cl22) |>
janitor::clean_names() |>
rename(state = cl22) |>
mutate(
event_date = lubridate::ymd(event_date)
)
# counts by state/territory
state_counts <-
bandicoots_cleaned |>
group_by(state) |>
count() |>
drop_na()
# join map with counts
aus_counts <-
aus |>
left_join(state_counts, join_by(NAME == state)) |>
replace_na(list(n = 0))
# Map
ggplot() +
geom_sf(data = aus_counts,
aes(fill = n),
colour = "grey60") +
viridis::scale_fill_viridis(option = "F",
begin = 0.2,
direction = -1) +
theme_void() +
theme(legend.position = "right")
# Title: Map - number of bandicoot observations
# Author: Dax Kellie
# Date: 2024-11-16
# packages
library(tidyverse)
library(galah)
library(janitor)
library(sf)
library(ozmaps)
galah_config(email = "dax.kellie@csiro.au")
# download map of Australia
aus <- ozmap_states |>
st_transform(crs = 4326) # fix projection
# download bandicoot records
bandicoots <- galah_call() |>
identify("perameles") |>
filter(year == 2003) |>
select(group = "basic", cl22) |>
atlas_occurrences()
# filter data, rename column, fix date class
bandicoots |>
select(recordID, scientificName, decimalLongitude,
decimalLatitude, eventDate, cl22) |>
janitor::clean_names() |>
rename(state = cl22) |>
mutate(
event_date = lubridate::ymd(event_date)
)
# counts by state/territory
state_counts <-
bandicoots_cleaned |>
group_by(state) |>
count() |>
drop_na()
# join map with counts
aus_counts <-
aus |>
left_join(state_counts, join_by(NAME == state)) |>
replace_na(list(n = 0))
# Map
ggplot() +
geom_sf(data = aus_counts,
aes(fill = n),
colour = "grey60") +
viridis::scale_fill_viridis(option = "F",
begin = 0.2,
direction = -1) +
theme_void() +
theme(legend.position = "right")
Making code reproducibility depends on making a reproducible working environment
- R projects
- GitHub
- Organised folder
- Well-named files
- Document package versions
- Data stored locally & online
When 174 analyst teams were asked to use 2 datasets to answer 2 ecology/evolution questions, results were all over the grid.
Dax Kellie
Data Analyst & Science Lead
Science & Decision Support | ALA
: dax.kellie@csiro.au
: @daxkellie
Science & Decision Support team
Martin Westgate, Shandiya Balasubramaniam
Olivia Torresan, Juliet Seers, Amanda Buyan
These slides were made using Quarto & RStudio
Slides: