Of 352 studies, despite ~80% sharing data only ~20% shared code
…but maybe it’s just hard to change…
library(tidyverse)
library(galah)
setwd("C:\Users\KEL329\OneDrive - CSIRO\Documents\ALA\Talks\ESA2024")
galah_config(email = "dax.kellie@csiro.au")
alaData <- galah_call()|>identify("perameles")|>filter(year == 2003) |>
  select(group="basic",cl22)|>
  atlas_occurrences()|>
  select(recordID,scientificName,decimalLongitude,decimalLatitude,eventDate,cl22) |>
  janitor::clean_names()|>rename(state=cl22) |>
  mutate(event_date=lubridate::ymd(event_date)) |>
  group_by(state) |>count() |>drop_na()
mmap <- ozmaps::ozmap_states |>
  sf::st_transform(crs=4326) |>
  left_join(alaData, join_by(NAME==state)) |>
  replace_na(list(n=0))
ggplot() + geom_sf(data=mmap,aes(fill=n),colour="grey60")+
  viridis::scale_fill_viridis(option="F",begin=0.2,direction=-1)+theme_void()+theme(legend.position="right")# Title: Map - number of bandicoot observations
# Author: Dax Kellie
# Date: 2024-11-16
setwd("C:\Users\KEL329\OneDrive - CSIRO\Documents\ALA\Talks\ESA2024")
# packages
library(tidyverse)
library(galah)
library(janitor)
library(sf)
library(ozmaps)
galah_config(email = "dax.kellie@csiro.au")
# download map of Australia
aus <- ozmap_states |>
  st_transform(crs = 4326) # fix projection
# download bandicoot records
bandicoots <- galah_call() |>
  identify("perameles") |>
  filter(year == 2003) |>
  select(group = "basic", cl22) |>
  atlas_occurrences() 
# filter data, rename column, fix date class
bandicoots |>
  select(recordID, scientificName, decimalLongitude, 
         decimalLatitude, eventDate, cl22) |>
  janitor::clean_names() |> 
  rename(state = cl22) |>
  mutate(
    event_date = lubridate::ymd(event_date)
    ) 
# counts by state/territory
state_counts <- 
  bandicoots_cleaned |>
  group_by(state) |> 
  count() |> 
  drop_na()
# join map with counts
aus_counts <- 
  aus |>
  left_join(state_counts, join_by(NAME == state)) |>
  replace_na(list(n = 0))
# Map
ggplot() + 
  geom_sf(data = aus_counts,
          aes(fill = n),
          colour = "grey60") +
  viridis::scale_fill_viridis(option = "F", 
                              begin = 0.2, 
                              direction = -1) + 
  theme_void() + 
  theme(legend.position = "right")This code reads better, but it’s no more reproducible!
Code reproducibilty depends on a reproducible work environment
.Rproj files to tell R where your project’s top folder directory isAn online platform for storing project repositories
GitHub is useful for reasons other than collaborative code writing, too!
To get setup:
use_git() + use_github() can initialise & link a local directory to a GitHub repository and it’s fast
Jenny Bryan’s file name holy trinity
Bad:
dat2024_bsrFinalDK-new.csvscript.RGood:
2024-16-11_bandicoots.csvmap_counts-by-state.RIn order of comprehensiveness/ease:
{renv}
init(), snapshot(), restore(){groundhog}
groundhog.library(pkg-name, date)sessionInfo()
sessionInfo() |> report::report()Locally & online
Zenodo, Open Science Framework
Generate a DOI for your data
[1] "https://doi.org/10.26197/ala.78e21acd-7516-4fb2-91fe-9b86f5fcd83b"# Title: Map - number of bandicoot observations
# Author: Dax Kellie
# Date: 2024-11-16
setwd("C:\Users\KEL329\OneDrive - CSIRO\Documents\ALA\Talks\ESA2024")
# packages
library(tidyverse)
library(galah)
library(janitor)
library(sf)
library(ozmaps)
galah_config(email = "dax.kellie@csiro.au")
# download map of Australia
aus <- ozmap_states |>
  st_transform(crs = 4326) # fix projection
# download bandicoot records
bandicoots <- galah_call() |>
  identify("perameles") |>
  filter(year == 2003) |>
  select(group = "basic", cl22) |>
  atlas_occurrences() 
# filter data, rename column, fix date class
bandicoots |>
  select(recordID, scientificName, decimalLongitude, 
         decimalLatitude, eventDate, cl22) |>
  janitor::clean_names() |> 
  rename(state = cl22) |>
  mutate(
    event_date = lubridate::ymd(event_date)
    ) 
# counts by state/territory
state_counts <- 
  bandicoots_cleaned |>
  group_by(state) |> 
  count() |> 
  drop_na()
# join map with counts
aus_counts <- 
  aus |>
  left_join(state_counts, join_by(NAME == state)) |>
  replace_na(list(n = 0))
# Map
ggplot() + 
  geom_sf(data = aus_counts,
          aes(fill = n),
          colour = "grey60") +
  viridis::scale_fill_viridis(option = "F", 
                              begin = 0.2, 
                              direction = -1) + 
  theme_void() + 
  theme(legend.position = "right")# Title: Map - number of bandicoot observations
# Author: Dax Kellie
# Date: 2024-11-16
# packages
library(tidyverse)
library(galah)
library(janitor)
library(sf)
library(ozmaps)
galah_config(email = "dax.kellie@csiro.au")
# download map of Australia
aus <- ozmap_states |>
  st_transform(crs = 4326) # fix projection
# download bandicoot records
bandicoots <- galah_call() |>
  identify("perameles") |>
  filter(year == 2003) |>
  select(group = "basic", cl22) |>
  atlas_occurrences() 
# filter data, rename column, fix date class
bandicoots |>
  select(recordID, scientificName, decimalLongitude, 
         decimalLatitude, eventDate, cl22) |>
  janitor::clean_names() |> 
  rename(state = cl22) |>
  mutate(
    event_date = lubridate::ymd(event_date)
    ) 
# counts by state/territory
state_counts <- 
  bandicoots_cleaned |>
  group_by(state) |> 
  count() |> 
  drop_na()
# join map with counts
aus_counts <- 
  aus |>
  left_join(state_counts, join_by(NAME == state)) |>
  replace_na(list(n = 0))
# Map
ggplot() + 
  geom_sf(data = aus_counts,
          aes(fill = n),
          colour = "grey60") +
  viridis::scale_fill_viridis(option = "F", 
                              begin = 0.2, 
                              direction = -1) + 
  theme_void() + 
  theme(legend.position = "right")Making code reproducibility depends on making a reproducible working environment
- R projects
- GitHub
- Organised folder
- Well-named files
- Document package versions
- Data stored locally & onlineWhen 174 analyst teams were asked to use 2 datasets to answer 2 ecology/evolution questions, results were all over the grid.
 Dax Kellie
Data Analyst & Science Lead
Science & Decision Support | ALA 
: dax.kellie@csiro.au
 
 : @daxkellie 
 
 Science & Decision Support team
 Martin Westgate, Shandiya Balasubramaniam
 Olivia Torresan, Juliet Seers, Amanda Buyan
These slides were made using Quarto & RStudio
Slides: