60HN

1 Constants

path2data <- "/Users/MarcChoisy/Library/CloudStorage/OneDrive-OxfordUniversityClinicalResearchUnit/GitHub/choisy/60HN/"
data_file <- "25-7-2025-_60HN_PATIENT_P1_Data.xlsx"

2 Packages

library(readxl)
library(purrr)
library(dplyr)
library(lubridate)

3 Data

Reading the data:

file <- paste0(path2data, data_file)
sheets <- file |>
  excel_sheets() |> 
  head(-1)

data60HN <- sheets |> 
  map(read_excel, path = file) |> 
  setNames(sheets) |> 
  map(~ .x |>  # de-duplication of records
        group_by(USUBJID) |> 
        group_modify(~ .x |>
                       arrange(desc(entry)) |> 
                       first()) |> 
        ungroup())

4 Durations of stay:

data60HN$SCR |> 
  mutate(SPEC_ADMISSION = as_datetime(paste(SPEC_DATE_ADMISSION, SPEC_TIME_ADMISSION)),
         SPEC_DISCHARGE = as_datetime(paste(SPEC_DATE_DISCHARGE, SPEC_TIME_DISCHARGE))) |> 
  select(- SPEC_DATE_ADMISSION, - SPEC_TIME_ADMISSION, - SPEC_DATE_DISCHARGE, - SPEC_TIME_DISCHARGE)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `SPEC_DISCHARGE = as_datetime(paste(SPEC_DATE_DISCHARGE,
  SPEC_TIME_DISCHARGE))`.
Caused by warning:
!  97 failed to parse.
# A tibble: 2,006 × 17
   USUBJID    EVENT STUDYID    SITEID SUBJID INCLUSION_1 INCLUSION_2 INCLUSION_3
   <chr>      <chr> <chr>      <chr>  <chr>  <lgl>       <lgl>       <lgl>      
 1 008-1-1-01 All   60HN_PATI… 008    1-1-01 TRUE        TRUE        TRUE       
 2 008-1-1-02 All   60HN_PATI… 008    1-1-02 TRUE        TRUE        TRUE       
 3 008-1-1-03 All   60HN_PATI… 008    1-1-03 TRUE        TRUE        TRUE       
 4 008-1-1-04 All   60HN_PATI… 008    1-1-04 TRUE        TRUE        TRUE       
 5 008-1-1-05 All   60HN_PATI… 008    1-1-05 TRUE        TRUE        TRUE       
 6 008-1-1-06 All   60HN_PATI… 008    1-1-06 TRUE        TRUE        TRUE       
 7 008-1-1-07 All   60HN_PATI… 008    1-1-07 TRUE        TRUE        TRUE       
 8 008-1-1-08 All   60HN_PATI… 008    1-1-08 TRUE        TRUE        TRUE       
 9 008-1-1-09 All   60HN_PATI… 008    1-1-09 TRUE        TRUE        TRUE       
10 008-1-1-10 All   60HN_PATI… 008    1-1-10 TRUE        TRUE        TRUE       
# ℹ 1,996 more rows
# ℹ 9 more variables: EXCLUSION <chr>, SPECID_ADMISSION <chr>,
#   SPECID_DISCHARGE <chr>, SPEC_DISCHARGE_REASON <chr>,
#   SPEC_DISCHARGE_REASON_SPECIFY <chr>, entry <dbl>, enteredtime <chr>,
#   SPEC_ADMISSION <dttm>, SPEC_DISCHARGE <dttm>