Oxidative Stress

Analyzing the effect of Condition (Hypoxia vs Normoxia) on a Oxidative Stress gene panel

Setup

source(here::here("src", "setup.R"), echo = FALSE)

## Since downlit doesn't seem to link packages without an explicit & visible library() call anymore ...

library(here)
library(pipebind)

library(dplyr)
library(tidyr)
library(stringr)
library(purrr)
library(ggplot2)

library(glmmTMB)
library(parameters)
library(insight)
library(performance)
library(emmeans)

library(DT)
library(plotly)

## Data

supplementary_data <- load_supplementary_data()

OS_data <- load_pcr_data("OS")

1 Clean Data

Here’s the data from our Oxidative Stress gene panel, which has been cleaned by:

Removing missing DCq values
Removing genes missing either N or IH Condition
Removing genes having less than 3 data points in either the N or IH Condition

2 Model fitting & diagnostics

2.1 Model fitting

Let’s define the model we will fit to each Gene’s DCq data.

Here, we will fit a Linear Mixed Effect Model, with a random effect per Experiment, to account for potential clustering:

\[ \begin{aligned} DCq_{i} &\sim N \left(\alpha_{j[i]} + \beta_{1}(Condition), \sigma^2 \right) \\ \alpha_{j} &\sim N \left(\mu_{\alpha_{j}}, \sigma^2_{\alpha_{j}} \right) \text{, for Experiment j = 1,} \dots \text{,J} \end{aligned} \]

OS_model <- function(data) {
  form <- "DCq ~ Condition"
  
  # If the data contains an Experiment variable with more than two levels, add a random effect by Experiment
  if ("Experiment" %in% colnames(data) && n_distinct(data$Experiment) >= 2) 
    form <- str_c(form, " + (1 | Experiment)")

  glmmTMB(as.formula(form), family = gaussian("identity"), data = data, contrasts = list(Condition = "contr.sum"))
}

Now, let’s fit said model to each Gene’s data, for a given Stage and Layer:

compute_fold_change

compute_fold_change <- function(mod) {
  return(
    get_data(mod)
    |> select(Condition, DCq)
    |> pivot_wider(names_from = Condition, values_from = DCq, values_fn = mean)
    |> summarize(Fold = 2**(-1 * (IH - N)))
    |> pull(Fold)
  )
}

(OS_data$models <- OS_data$clean
  |> group_split(Stage, Layer, Gene)
  |> map_dfr(
    \(d) summarize(d, Mod = pick(Experiment, Condition, DCq) |> OS_model() |> list(), .by = c(Stage, Layer, Gene)), 
    .progress = "Fitting models:"
  )
  |> filter(!has_na_coefs(Mod)) # Removing models that did not fit properly
  |> mutate(Fold = map_dbl(Mod, compute_fold_change)) # Adding the Fold change
  |> select(Stage, Layer, Gene, Fold, Mod)
)

2.2 Model diagnostics

Here are the diagnostic plots for a random sample of the fitted models:

3 Model analysis

For each model we fit, we can then extract the CI and p.value for the relevant contrasts, and use those to establish if a Gene was up or down-regulated:

get_emmeans_data

get_emmeans_data <- function(mod) {
  return(
    emmeans(mod, specs = "Condition", type = "response")
    |> contrast(method = "pairwise", adjust = "none", infer = TRUE)
    |> as.data.frame()
    |> pivot_wider(names_from = contrast, values_from = estimate)
    |> select(last_col(), LCB = lower.CL, UCB = upper.CL, p.value)
    |> mutate(across(where(is.character), \(x) na_if(x, "NaN")))
  )
}

get_regulation_type

get_regulation_type <- function(fold, p_value) {
  case_when(
    p_value <= .05 & fold < 1 ~ regulation_type$DOWNREG,
    p_value <= .05 & fold > 1 ~ regulation_type$UPREG,
    is.na(p_value) | is.na(fold) ~ NA_character_,
    .default = regulation_type$NOT_REG
  )
}

(OS_data$predictions <- OS_data$models
  |> group_split(Stage, Layer, Gene)
  |> map_dfr(\(d) mutate(d, get_emmeans_data(Mod[[1]])), .progress = "Extracting model predictions:")
  |> filter(!is.na(p.value))
  |> mutate(Expression = get_regulation_type(Fold, p.value))
  |> select(Stage, Layer, Gene, Fold, Expression, matches("-|/"), LCB, UCB, p.value)
)

3.1 Gene regulation overview

We can get a general overview of which Gene are up or down-regulated through a Sunburst plot, stacked by Stage and Layer.

make_suburst_plot

make_suburst_plot <- function(dat, layers, tooltips = NULL, colors = NULL, plot_options = list()) {
  
  if (!is.null(colors)) colors <- set_names(colors, \(x) x |> str_replace_all(" ", "\n") |> str_replace_all("and\n", "and "))
  
  .make_sunburst_data <- function(dat, layers_part) {
    
    if (all(layers == layers_part))
      sun_dat <- summarize(dat, across(any_of(c(layers_part, tooltips)), first), .by = all_of(layers_part))
    else 
      sun_dat <- summarize(dat, across(any_of(layers_part), first), .by = all_of(layers_part))
    
    return(
      bind_cols(
        transmute(sun_dat, ids = pick(any_of(layers_part)) |> pmap_chr(\(...) str_c(..., sep = ">>"))),
        transmute(
          sun_dat, 
          parents = pick(any_of(layers_part)) |> 
            select(-last_col()) |> 
            pmap_chr(\(...) str_c(..., sep = ">>")) |> 
            bind(x, if (is_empty(x)) "" else x)
        ),
        transmute(sun_dat, labels = pick(any_of(layers_part)) |> select(last_col()) |> pull(1))
      )
      |> bind(
        x, 
        if (all(layers == layers_part) & !is.null(tooltips)) 
          bind_cols(
            x, 
            transmute(
              sun_dat, 
              hovertext = pmap_chr(
                pick(all_of(tooltips)), 
                \(...) str_c(
                  names(c(...)), 
                  map_if(list(...), is.numeric, \(x) round(x, 3)) |> map_if(is.factor, as.character), 
                  sep = ": ", 
                  collapse = "\n"
                )
              )
            )
          )
        else x
      )
      |> mutate(labels = labels |> str_replace_all(" ", "\n") |> str_replace_all("and\n", "and "))
    )
  }
  
  params <- map_dfr(1:length(layers), \(x) .make_sunburst_data(dat, layers[1:x])) |> 
    mutate(bg_color = map_chr(labels, \(x) colors[[x]] %||% NA))
  
  rlang::exec(
    plotly::plot_ly,
    !!!select(params, -bg_color),
    type = "sunburst",
    branchvalues = "total",
    hoverinfo = "text",
    sort = FALSE,
    marker = list(colors = ~params$bg_color),
    !!!plot_options
  )
}

OS_data$predictions |>
  filter(Expression != regulation_type$NOT_REG) |>
  left_join(supplementary_data$gene_data$OS, join_by(Gene)) |>
  make_suburst_plot(
    layers = c("Stage", "Layer", "Expression", "Gene"),
    tooltips = c("Gene", "Pathway", "Fold", "p.value"),
    colors = sunburst_pcr_colors,
    plot_options = list(insidetextorientation = 'radial')
  )

3.2 Gene regulation timeline

To get a better idea of how each Gene’s regulation changes through time, we can plot a timeline of their expression, split by Layer and Pathway, with a color coding of the Effect of this regulation.

make_fold_timeline_plot

make_fold_timeline_plot <- function(
    dat, facet_rows = "Pathway", trans = "identity", 
    color_by = NULL, colors = colors_effect, size_boost = 1
) {
  
  origin <- do.call(trans, list(1))
  
  dat <- (
    dat
    |> mutate(Fold_trans = do.call(trans, list(Fold)))
    |> mutate(Fold_Amp = ifelse(
      max(Fold_trans, na.rm = TRUE) - min(Fold_trans, na.rm = TRUE) != 0, 
      max(Fold_trans, na.rm = TRUE) - min(Fold_trans, na.rm = TRUE), 
      mean(Fold_trans, na.rm = TRUE)) * 0.1,
      .by = all_of(c(facet_rows, "Stage"))
    )
  )
  
  timeline <- (
    ggplot(dat)
    + { if(is.null(color_by)) aes(x = Gene, color = Fold >= 1) else aes(x = Gene, color = .data[[color_by]]) }
    + geom_linerange(aes(ymax = Fold_trans), ymin = origin, size = 2 + (size_boost * 0.5))
    + geom_hline(yintercept = origin, size = 0.3, linetype = "dotted")
    + geom_text(aes(
        label = str_c(round(Fold, 2), stars.pval(p.value) |> str_replace(fixed("."), ""), sep = " "), 
        y = ifelse(Fold_trans > origin, Fold_trans + Fold_Amp, Fold_trans - Fold_Amp),
        hjust = ifelse(Fold > 1, 0, 1)
      ),
      vjust = 0.5, angle = 0, size = 2 + (size_boost * 0.25), check_overlap = TRUE
    )
    + scale_color_manual(" ", values = colors)
    + scale_y_continuous(breaks = c(0,1,2,3), expand = expansion(mult = 1.01 * (1 + (size_boost/100))))
    + scale_x_discrete(expand = expansion(add = 1 * size_boost), limits = \(x) rev(x))
    + labs(
      x = "",
      y = ifelse(trans != "identity", str_glue("Fold Change *({trans} scale)*"), "Fold Change")
    )
    + coord_flip()
    + facet_grid(
      vars(.data[[facet_rows]]), vars(Stage), 
      scales = "free_y", space = "free_y", labeller = label_wrap_gen(width = 12, multi_line = TRUE)
    )
    + { if (!is.null(color_by)) guides(color = guide_legend(title = color_by)) }
    + theme(
      legend.position = ifelse(is.null(color_by), "none", "bottom")
      , axis.text.x = element_blank()
      , axis.title.x = element_markdown(size = 9)
      , axis.text.y = element_text(size = 7)
      , strip.text = element_text(size = 5 * size_boost)
      , plot.title = element_markdown(size = 9, face = "plain", vjust = 1, hjust = 0.5)
    )
  )
  
  return(timeline)
}

render_OS_timeline

render_OS_timeline <- function(dat, group, size_boost = 1.2) {
  
  if (group[[1]][1] == "Whole") {
    
    dat |>
      group_by(Figure) |> 
      group_map(\(d, g) render_OS_timeline(d, str_glue("Whole Cerebellum - {first(g[[1]])}"), 1.5))
  }
  else {
    
    group_name <- group[[1]][1]
    if (group_name == "PC") group_name <- "Purkinje Cells"
    
    if (group_name == "Cellular Response") {
      cell_resp_levels <- c("Cell Death and Protection", "Apoptotic Pathways", "Inflammation Pathways", "Autophagy and Mitophagy")
      dat <- mutate(dat, Pathway = factor(Pathway, levels = cell_resp_levels))
    }
    
    plot <- make_fold_timeline_plot(dat, facet_rows = "Pathway", trans = "log", color_by = "Effect", size_boost = size_boost)
  
    width <- 2 + n_distinct(dat$Stage)
    height <- dat |> 
      group_by(Pathway) |> 
      group_map(\(d, g) n_distinct(d$Gene) * 0.1 + 1) |> 
      flatten_dbl() |> 
      sum()
    
    template_md <- c(
      '### `r group_name`',
      '```{r}',
      '#| echo: false',
      '#| fig-width: !expr width',
      '#| fig-height: !expr height',
      'plot',
      '```'
    )
    
    knitr::knit_child(text = template_md, envir = rlang::env(), quiet = TRUE)
  }
}

OS_timelines <- (
  OS_data$predictions 
  |> left_join(supplementary_data$gene_data$OS, join_by(Gene)) 
  |> filter(p.value <= .05) 
  |> mutate(Effect = case_when(
      str_detect(Expression, "Downregulated") & Effect == "Beneficial" ~ "Deleterious",
      str_detect(Expression, "Downregulated") & Effect == "Deleterious" ~ "Beneficial",
      .default = Effect
    )
  )
  |> select(Stage, Layer, Gene, Fold, p.value, Expression, Effect, Pathway, Figure)
  |> group_by(Layer)
  |> group_map(render_OS_timeline)
  |> unlist()
)