The dataset we will be working with concerns a single-cell RNA-sequencing dataset consisting of two different experiments, which correspond to treatment (TCDD) and control. Nault et al. (Nault et al. 2021) studied the effect of that drug on the liver, along the central-portal axis.

Load data

cds <- condimentsPaper::import_TCDD()
# Get zonal genes ----
url <- paste0("https://static-content.springer.com/esm/art%3A10.1038%2Fnature",
              "21065/MediaObjects/41586_2017_BFnature21065_MOESM62_ESM.xlsx")
df <- openxlsx::read.xlsx(url, startRow = 3, na.strings = "NaN")
df <- df[, c(1, 21)] %>%
  dplyr::rename("qvalues" = `q-values`) %>%
  dplyr::filter(!is.na(qvalues), qvalues < .005) %>%
  dplyr::select(Gene.Symbol)
zone.mat <- df$Gene.Symbol %>% stringr::str_split(";") %>% unlist()
# Filter to only keep hepatocytes
hepatocytes <- subset(cds, subset = celltype.ontology == "CL:0000182:Hepatocyte")
common.genes <- intersect(zone.mat, rownames(hepatocytes))
DefaultAssay(hepatocytes) <- "integrated"
hepatocytes <- ScaleData(hepatocytes, verbose = FALSE)
hepatocytes <- RunPCA(hepatocytes, features = common.genes, verbose = FALSE)
hepatocytes <- RunUMAP(hepatocytes, dims = 1:30, verbose = FALSE)
tcdd <- as.SingleCellExperiment(hepatocytes, assay = "RNA")
tcdd$celltype <- droplevels(tcdd$celltype) %>%
  stringr::str_remove("Hepatocytes - ") %>%
  unlist()
rowData(tcdd)$is_zonal <- rownames(tcdd) %in% zone.mat

data("tcdd", package = "condimentsPaper")

EDA

The data is imported using a function from the package. We then compute reduced dimension coordinates with UMAP McInnes, Healy, and Melville (2018).

df <- bind_cols(
  as.data.frame(reducedDims(tcdd)$UMAP),
  as.data.frame(colData(tcdd)[, 1:60]))
p1 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = treatment)) +
  geom_point(size = .7) +
  scale_color_brewer(palette = "Accent") +
  labs(col = "Treatment")
p2 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, fill = celltype)) +
  geom_point(size = 1, alpha = .65, col = "grey70", shape = 21) +
  scale_fill_manual(values = pal) +
  labs(fill = "Cell Type")
p1

p2

tcdd <- imbalance_score(tcdd, dimred = "UMAP", conditions = "treatment", smooth = 5)
df$scores <- tcdd$scores$scaled_scores
p3 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = scores)) +
  geom_point(size = .7) +
  scale_color_viridis_c(option = "C") +
  labs(col = "Score")
p3

Differential Topology

To estimate the trajectory, we use slingshot (Street et al. 2018).

tcdd <- slingshot(tcdd, reducedDim = "UMAP", clusterLabels = tcdd$celltype,
                  start.clus = "Central", end.clus  = "Portal")

set.seed(9764)
topologyTest(tcdd, tcdd$treatment)

## Generating permuted trajectories

## Running KS-mean test

##    method thresh  statistic    p.value
## 1 KS_mean   0.01 0.01823837 0.07302285

rownames(df) <- colnames(tcdd)
df$cells <- rownames(df)
pst <- data.frame(cells = colnames(tcdd),
                  pst = slingPseudotime(tcdd)[, 1])
df <- dplyr::full_join(df, pst)

## Joining, by = "cells"

p4 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = pst)) +
  geom_point(size = .7) +
  scale_color_viridis_c() +
  labs(col = "Pseudotime") +
  geom_path(data = slingCurves(tcdd, as.df = TRUE) %>% arrange(Order),
            col = "black", size = 1.5)
p4

Differential Progression

p5 <- ggplot(df, aes(x = pst)) +
  geom_density(alpha = .8, aes(fill = treatment), col = "transparent") +
  geom_density(aes(col = treatment), fill = "transparent", size = 1.5) +
  guides(col = "none", 
         fill = guide_legend(override.aes = 
                               list(size = 1.5, col = c("#7FC97F", "#BEAED4"),
                                    title.position = "top"))) +
  scale_fill_brewer(palette = "Accent") +
  scale_color_brewer(palette = "Accent") +
  labs(x = "Pseudotime", fill = "Treatment")
p5

progressionTest(tcdd, conditions = tcdd$treatment)

## Registered S3 method overwritten by 'cli':
##   method     from         
##   print.boxx spatstat.geom

## # A tibble: 1 × 3
##   lineage statistic p.value
##   <chr>       <dbl>   <dbl>
## 1 1           0.266 2.2e-16

Differential Expression

We use tradeSeq (Van den Berge et al. 2020).

filter <- apply(counts(tcdd), 1, function(g) {
    sum(g >= 3) >= 10
})
tcdd <- tcdd[filter, ]

set.seed(3)
library(BiocParallel)
BPPARAM <- bpparam()
BPPARAM$workers <- 3
BPPARAM$progressbar <- TRUE
icMat <- evaluateK(counts = as.matrix(assays(tcdd)$counts),
                   pseudotime = slingPseudotime(sds),
                   cellWeights = rep(1, ncol(tcdd)),
                   conditions = factor(tcdd$treatment),
                   nGenes = 300,
                   k = 3:7, parallel = TRUE,
                   BPPARAM = BPPARAM)
set.seed(3)
tcdd <- fitGAM(counts = tcdd,
              conditions = factor(tcdd$treatment),
              nknots = 7,
              verbose = TRUE, 
              parallel = TRUE,
              BPPARAM = BPPARAM)

Assessing Differential Expression Accross conditions

condRes <- conditionTest(tcdd, l2fc = log2(2))
condRes$padj <- p.adjust(condRes$pvalue, "fdr")
mean(condRes$padj <= 0.05, na.rm = TRUE)

## [1] 0.2642991

sum(condRes$padj <= 0.05, na.rm = TRUE)

## [1] 2121

conditionGenes <- rownames(condRes)[condRes$padj <= 0.05]
conditionGenes <- conditionGenes[!is.na(conditionGenes)]
scales <- brewer.pal(3, "Accent")[1:2]
oo <- order(condRes$waldStat, decreasing = TRUE)

# most significant gene
p1 <- plotSmoothers(tcdd, assays(tcdd)$counts,
                    gene = rownames(assays(tcdd)$counts)[oo[1]],
                    alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tcdd)$counts)[oo[1]])

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

# second most significant gene
p2 <- plotSmoothers(tcdd, assays(tcdd)$counts,
                    gene = rownames(assays(tcdd)$counts)[oo[2]],
                    alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tcdd)$counts)[oo[2]])

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

# least significant gene
p3 <- plotSmoothers(tcdd, assays(tcdd)$counts,
                    gene = rownames(assays(tcdd)$counts)[oo[nrow(tcdd)]],
                    alpha = 1, border = TRUE, curvesCols = scales) +
  scale_color_manual(values = scales) +
  ggtitle(rownames(assays(tcdd)$counts)[oo[nrow(tcdd)]])

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

p1

p2

p3

### based on mean smoother
yhatSmooth <- predictSmooth(tcdd, gene = conditionGenes, nPoints = 50, tidy = FALSE) %>%
  log1p()
yhatSmoothScaled <- t(apply(yhatSmooth,1, scales::rescale))
heatSmooth_TCDD <- pheatmap(yhatSmoothScaled[, 51:100],
  cluster_cols = FALSE,
  show_rownames = FALSE, show_colnames = FALSE, main = "TCDD", legend = FALSE,
  silent = TRUE
)

matchingHeatmap_control <- pheatmap(yhatSmoothScaled[heatSmooth_TCDD$tree_row$order, 1:50],
  cluster_cols = FALSE, cluster_rows = FALSE,
  show_rownames = FALSE, show_colnames = FALSE, main = "Control",
  legend = FALSE, silent = TRUE 
)

p4 <- plot_grid(heatSmooth_TCDD[[4]], matchingHeatmap_control[[4]], ncol = 2)
p4

kept <- rownames(tcdd)[rowData(tcdd)$is_zonal]
mat <- matrix(c(sum(kept %in% conditionGenes), length(conditionGenes),
                sum(kept %in% rownames(tcdd)[!rownames(tcdd) %in% conditionGenes]),
                nrow(tcdd) - length(conditionGenes)),
              byrow = FALSE, ncol = 2)
fisher.test(mat)

## 
##  Fisher's Exact Test for Count Data
## 
## data:  mat
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.764609 2.412226
## sample estimates:
## odds ratio 
##   2.063913

Session info

sessionInfo()

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] scales_1.1.1                pheatmap_1.0.12            
##  [3] RColorBrewer_1.1-2          tradeSeq_1.7.04            
##  [5] cowplot_1.1.1               ggplot2_3.3.5              
##  [7] condiments_1.1.04           slingshot_2.1.1            
##  [9] TrajectoryUtils_1.0.0       princurve_2.1.6            
## [11] SingleCellExperiment_1.14.1 SummarizedExperiment_1.22.0
## [13] Biobase_2.52.0              GenomicRanges_1.44.0       
## [15] GenomeInfoDb_1.28.2         IRanges_2.26.0             
## [17] S4Vectors_0.30.0            BiocGenerics_0.38.0        
## [19] MatrixGenerics_1.4.3        matrixStats_0.60.1         
## [21] SeuratObject_4.0.2          Seurat_4.0.3               
## [23] stringr_1.4.0               dplyr_1.0.7                
## [25] knitr_1.33                 
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2                reticulate_1.20          
##   [3] tidyselect_1.1.1          htmlwidgets_1.5.3        
##   [5] grid_4.1.0                BiocParallel_1.26.2      
##   [7] Rtsne_0.15                pROC_1.17.0.1            
##   [9] munsell_0.5.0             codetools_0.2-18         
##  [11] ragg_1.1.3                ica_1.0-2                
##  [13] future_1.21.0             miniUI_0.1.1.1           
##  [15] withr_2.4.2               colorspace_2.0-2         
##  [17] highr_0.9                 rstudioapi_0.13          
##  [19] ROCR_1.0-11               tensor_1.5               
##  [21] listenv_0.8.0             labeling_0.4.2           
##  [23] GenomeInfoDbData_1.2.6    polyclip_1.10-0          
##  [25] farver_2.1.0              rprojroot_2.0.2          
##  [27] parallelly_1.27.0         vctrs_0.3.8              
##  [29] generics_0.1.0            ipred_0.9-11             
##  [31] xfun_0.24                 Ecume_0.9.1              
##  [33] R6_2.5.1                  locfit_1.5-9.4           
##  [35] bitops_1.0-7              spatstat.utils_2.2-0     
##  [37] cachem_1.0.5              DelayedArray_0.18.0      
##  [39] assertthat_0.2.1          promises_1.2.0.1         
##  [41] nnet_7.3-16               gtable_0.3.0             
##  [43] globals_0.14.0            goftest_1.2-2            
##  [45] timeDate_3043.102         rlang_0.4.11             
##  [47] systemfonts_1.0.2         splines_4.1.0            
##  [49] lazyeval_0.2.2            ModelMetrics_1.2.2.2     
##  [51] spatstat.geom_2.2-2       yaml_2.2.1               
##  [53] reshape2_1.4.4            abind_1.4-5              
##  [55] httpuv_1.6.1              caret_6.0-88             
##  [57] tools_4.1.0               lava_1.6.9               
##  [59] ellipsis_0.3.2            spatstat.core_2.3-0      
##  [61] jquerylib_0.1.4           proxy_0.4-26             
##  [63] ggridges_0.5.3            Rcpp_1.0.7               
##  [65] plyr_1.8.6                sparseMatrixStats_1.4.0  
##  [67] zlibbioc_1.38.0           purrr_0.3.4              
##  [69] RCurl_1.98-1.4            rpart_4.1-15             
##  [71] deldir_0.2-10             pbapply_1.4-3            
##  [73] viridis_0.6.1             zoo_1.8-9                
##  [75] ggrepel_0.9.1             cluster_2.1.2            
##  [77] fs_1.5.0                  magrittr_2.0.1           
##  [79] data.table_1.14.0         scattermore_0.7          
##  [81] lmtest_0.9-38             RANN_2.6.1               
##  [83] fitdistrplus_1.1-5        patchwork_1.1.1          
##  [85] mime_0.11                 evaluate_0.14            
##  [87] xtable_1.8-4              gridExtra_2.3            
##  [89] compiler_4.1.0            transport_0.12-2         
##  [91] tibble_3.1.4              KernSmooth_2.23-20       
##  [93] crayon_1.4.1              htmltools_0.5.1.1        
##  [95] mgcv_1.8-36               later_1.2.0              
##  [97] tidyr_1.1.3               lubridate_1.7.10         
##  [99] DBI_1.1.1                 MASS_7.3-54              
## [101] Matrix_1.3-4              cli_3.0.1                
## [103] gower_0.2.2               igraph_1.2.6             
## [105] pkgconfig_2.0.3           pkgdown_1.6.1            
## [107] plotly_4.9.4.1            spatstat.sparse_2.0-0    
## [109] recipes_0.1.16            foreach_1.5.1            
## [111] bslib_0.2.5.1             XVector_0.32.0           
## [113] prodlim_2019.11.13        digest_0.6.27            
## [115] sctransform_0.3.2         RcppAnnoy_0.0.18         
## [117] spatstat.data_2.1-0       rmarkdown_2.9            
## [119] leiden_0.3.8              uwot_0.1.10              
## [121] edgeR_3.34.0              DelayedMatrixStats_1.14.0
## [123] kernlab_0.9-29            shiny_1.6.0              
## [125] lifecycle_1.0.0           nlme_3.1-152             
## [127] jsonlite_1.7.2            desc_1.3.0               
## [129] viridisLite_0.4.0         limma_3.48.3             
## [131] fansi_0.5.0               pillar_1.6.2             
## [133] spatstat.linnet_2.3-0     lattice_0.20-44          
## [135] fastmap_1.1.0             httr_1.4.2               
## [137] survival_3.2-11           glue_1.4.2               
## [139] spatstat_2.2-0            png_0.1-7                
## [141] iterators_1.0.13          class_7.3-19             
## [143] stringi_1.7.3             sass_0.4.0               
## [145] textshaping_0.3.5         memoise_2.0.0            
## [147] irlba_2.3.3               e1071_1.7-7              
## [149] future.apply_1.7.0

References

Becht, Etienne, Leland McInnes, John Healy, Charles-Antoine Dutertre, Immanuel W H Kwok, Lai Guan Ng, Florent Ginhoux, and Evan W Newell. 2019. “Dimensionality reduction for visualizing single-cell data using UMAP.” Nature Biotechnology 37 (1): 38–44. https://doi.org/10.1038/nbt.4314.

McInnes, Leland, John Healy, and James Melville. 2018. “UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction.” Arxiv, February. http://arxiv.org/abs/1802.03426.

Nault, Rance, Kelly A. Fader, Sudin Bhattacharya, and Tim R. Zacharewski. 2021. “Single-Nuclei RNA Sequencing Assessment of the Hepatic Effects of 2,3,7,8-Tetrachlorodibenzo-p-dioxin.” CMGH 11 (1): 147–59. https://doi.org/10.1016/j.jcmgh.2020.07.012.

Street, Kelly, Davide Risso, Russell B. Fletcher, Diya Das, John Ngai, Nir Yosef, Elizabeth Purdom, and Sandrine Dudoit. 2018. “Slingshot: cell lineage and pseudotime inference for single-cell transcriptomics.” BMC Genomics 19 (1): 477. https://doi.org/10.1186/s12864-018-4772-0.

Van den Berge, Koen, Hector Roux de Bézieux, Kelly Street, Wouter Saelens, Robrecht Cannoodt, Yvan Saeys, Sandrine Dudoit, and Lieven Clement. 2020. “Trajectory-based differential expression analysis for single-cell sequencing data.” Nature Communications 11 (1): 1201. https://doi.org/10.1038/s41467-020-14766-3.

Analysis of the TCDD datataset

Hector Roux de Bézieux