TCDD.Rmd
The dataset we will be working with concerns a single-cell RNA-sequencing dataset consisting of two different experiments, which correspond to treatment (TCDD) and control. Nault et al. (Nault et al. 2021) studied the effect of that drug on the liver, along the central-portal axis.
cds <- condimentsPaper::import_TCDD()
# Get zonal genes ----
url <- paste0("https://static-content.springer.com/esm/art%3A10.1038%2Fnature",
"21065/MediaObjects/41586_2017_BFnature21065_MOESM62_ESM.xlsx")
df <- openxlsx::read.xlsx(url, startRow = 3, na.strings = "NaN")
df <- df[, c(1, 21)] %>%
dplyr::rename("qvalues" = `q-values`) %>%
dplyr::filter(!is.na(qvalues), qvalues < .005) %>%
dplyr::select(Gene.Symbol)
zone.mat <- df$Gene.Symbol %>% stringr::str_split(";") %>% unlist()
# Filter to only keep hepatocytes
hepatocytes <- subset(cds, subset = celltype.ontology == "CL:0000182:Hepatocyte")
common.genes <- intersect(zone.mat, rownames(hepatocytes))
DefaultAssay(hepatocytes) <- "integrated"
hepatocytes <- ScaleData(hepatocytes, verbose = FALSE)
hepatocytes <- RunPCA(hepatocytes, features = common.genes, verbose = FALSE)
hepatocytes <- RunUMAP(hepatocytes, dims = 1:30, verbose = FALSE)
tcdd <- as.SingleCellExperiment(hepatocytes, assay = "RNA")
tcdd$celltype <- droplevels(tcdd$celltype) %>%
stringr::str_remove("Hepatocytes - ") %>%
unlist()
rowData(tcdd)$is_zonal <- rownames(tcdd) %in% zone.mat
data("tcdd", package = "condimentsPaper")
The data is imported using a function from the package. We then compute reduced dimension coordinates with UMAP McInnes, Healy, and Melville (2018).
df <- bind_cols(
as.data.frame(reducedDims(tcdd)$UMAP),
as.data.frame(colData(tcdd)[, 1:60]))
p1 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = treatment)) +
geom_point(size = .7) +
scale_color_brewer(palette = "Accent") +
labs(col = "Treatment")
p2 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, fill = celltype)) +
geom_point(size = 1, alpha = .65, col = "grey70", shape = 21) +
scale_fill_manual(values = pal) +
labs(fill = "Cell Type")
p1
p2
tcdd <- imbalance_score(tcdd, dimred = "UMAP", conditions = "treatment", smooth = 5)
df$scores <- tcdd$scores$scaled_scores
p3 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = scores)) +
geom_point(size = .7) +
scale_color_viridis_c(option = "C") +
labs(col = "Score")
p3
To estimate the trajectory, we use slingshot (Street et al. 2018).
tcdd <- slingshot(tcdd, reducedDim = "UMAP", clusterLabels = tcdd$celltype,
start.clus = "Central", end.clus = "Portal")
set.seed(9764)
topologyTest(tcdd, tcdd$treatment)
## Generating permuted trajectories
## Running KS-mean test
## method thresh statistic p.value
## 1 KS_mean 0.01 0.01823837 0.07302285
rownames(df) <- colnames(tcdd)
df$cells <- rownames(df)
pst <- data.frame(cells = colnames(tcdd),
pst = slingPseudotime(tcdd)[, 1])
df <- dplyr::full_join(df, pst)
## Joining, by = "cells"
p4 <- ggplot(df, aes(x = UMAP_1, y = UMAP_2, col = pst)) +
geom_point(size = .7) +
scale_color_viridis_c() +
labs(col = "Pseudotime") +
geom_path(data = slingCurves(tcdd, as.df = TRUE) %>% arrange(Order),
col = "black", size = 1.5)
p4
p5 <- ggplot(df, aes(x = pst)) +
geom_density(alpha = .8, aes(fill = treatment), col = "transparent") +
geom_density(aes(col = treatment), fill = "transparent", size = 1.5) +
guides(col = "none",
fill = guide_legend(override.aes =
list(size = 1.5, col = c("#7FC97F", "#BEAED4"),
title.position = "top"))) +
scale_fill_brewer(palette = "Accent") +
scale_color_brewer(palette = "Accent") +
labs(x = "Pseudotime", fill = "Treatment")
p5
progressionTest(tcdd, conditions = tcdd$treatment)
## Registered S3 method overwritten by 'cli':
## method from
## print.boxx spatstat.geom
## # A tibble: 1 × 3
## lineage statistic p.value
## <chr> <dbl> <dbl>
## 1 1 0.266 2.2e-16
We use tradeSeq (Van den Berge et al. 2020).
set.seed(3)
library(BiocParallel)
BPPARAM <- bpparam()
BPPARAM$workers <- 3
BPPARAM$progressbar <- TRUE
icMat <- evaluateK(counts = as.matrix(assays(tcdd)$counts),
pseudotime = slingPseudotime(sds),
cellWeights = rep(1, ncol(tcdd)),
conditions = factor(tcdd$treatment),
nGenes = 300,
k = 3:7, parallel = TRUE,
BPPARAM = BPPARAM)
set.seed(3)
tcdd <- fitGAM(counts = tcdd,
conditions = factor(tcdd$treatment),
nknots = 7,
verbose = TRUE,
parallel = TRUE,
BPPARAM = BPPARAM)
condRes <- conditionTest(tcdd, l2fc = log2(2))
condRes$padj <- p.adjust(condRes$pvalue, "fdr")
mean(condRes$padj <= 0.05, na.rm = TRUE)
## [1] 0.2642991
sum(condRes$padj <= 0.05, na.rm = TRUE)
## [1] 2121
conditionGenes <- rownames(condRes)[condRes$padj <= 0.05]
conditionGenes <- conditionGenes[!is.na(conditionGenes)]
scales <- brewer.pal(3, "Accent")[1:2]
oo <- order(condRes$waldStat, decreasing = TRUE)
# most significant gene
p1 <- plotSmoothers(tcdd, assays(tcdd)$counts,
gene = rownames(assays(tcdd)$counts)[oo[1]],
alpha = 1, border = TRUE, curvesCols = scales) +
scale_color_manual(values = scales) +
ggtitle(rownames(assays(tcdd)$counts)[oo[1]])
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
# second most significant gene
p2 <- plotSmoothers(tcdd, assays(tcdd)$counts,
gene = rownames(assays(tcdd)$counts)[oo[2]],
alpha = 1, border = TRUE, curvesCols = scales) +
scale_color_manual(values = scales) +
ggtitle(rownames(assays(tcdd)$counts)[oo[2]])
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
# least significant gene
p3 <- plotSmoothers(tcdd, assays(tcdd)$counts,
gene = rownames(assays(tcdd)$counts)[oo[nrow(tcdd)]],
alpha = 1, border = TRUE, curvesCols = scales) +
scale_color_manual(values = scales) +
ggtitle(rownames(assays(tcdd)$counts)[oo[nrow(tcdd)]])
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p1
p2
p3
### based on mean smoother
yhatSmooth <- predictSmooth(tcdd, gene = conditionGenes, nPoints = 50, tidy = FALSE) %>%
log1p()
yhatSmoothScaled <- t(apply(yhatSmooth,1, scales::rescale))
heatSmooth_TCDD <- pheatmap(yhatSmoothScaled[, 51:100],
cluster_cols = FALSE,
show_rownames = FALSE, show_colnames = FALSE, main = "TCDD", legend = FALSE,
silent = TRUE
)
matchingHeatmap_control <- pheatmap(yhatSmoothScaled[heatSmooth_TCDD$tree_row$order, 1:50],
cluster_cols = FALSE, cluster_rows = FALSE,
show_rownames = FALSE, show_colnames = FALSE, main = "Control",
legend = FALSE, silent = TRUE
)
p4 <- plot_grid(heatSmooth_TCDD[[4]], matchingHeatmap_control[[4]], ncol = 2)
p4
kept <- rownames(tcdd)[rowData(tcdd)$is_zonal]
mat <- matrix(c(sum(kept %in% conditionGenes), length(conditionGenes),
sum(kept %in% rownames(tcdd)[!rownames(tcdd) %in% conditionGenes]),
nrow(tcdd) - length(conditionGenes)),
byrow = FALSE, ncol = 2)
fisher.test(mat)
##
## Fisher's Exact Test for Count Data
##
## data: mat
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.764609 2.412226
## sample estimates:
## odds ratio
## 2.063913
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] scales_1.1.1 pheatmap_1.0.12
## [3] RColorBrewer_1.1-2 tradeSeq_1.7.04
## [5] cowplot_1.1.1 ggplot2_3.3.5
## [7] condiments_1.1.04 slingshot_2.1.1
## [9] TrajectoryUtils_1.0.0 princurve_2.1.6
## [11] SingleCellExperiment_1.14.1 SummarizedExperiment_1.22.0
## [13] Biobase_2.52.0 GenomicRanges_1.44.0
## [15] GenomeInfoDb_1.28.2 IRanges_2.26.0
## [17] S4Vectors_0.30.0 BiocGenerics_0.38.0
## [19] MatrixGenerics_1.4.3 matrixStats_0.60.1
## [21] SeuratObject_4.0.2 Seurat_4.0.3
## [23] stringr_1.4.0 dplyr_1.0.7
## [25] knitr_1.33
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 reticulate_1.20
## [3] tidyselect_1.1.1 htmlwidgets_1.5.3
## [5] grid_4.1.0 BiocParallel_1.26.2
## [7] Rtsne_0.15 pROC_1.17.0.1
## [9] munsell_0.5.0 codetools_0.2-18
## [11] ragg_1.1.3 ica_1.0-2
## [13] future_1.21.0 miniUI_0.1.1.1
## [15] withr_2.4.2 colorspace_2.0-2
## [17] highr_0.9 rstudioapi_0.13
## [19] ROCR_1.0-11 tensor_1.5
## [21] listenv_0.8.0 labeling_0.4.2
## [23] GenomeInfoDbData_1.2.6 polyclip_1.10-0
## [25] farver_2.1.0 rprojroot_2.0.2
## [27] parallelly_1.27.0 vctrs_0.3.8
## [29] generics_0.1.0 ipred_0.9-11
## [31] xfun_0.24 Ecume_0.9.1
## [33] R6_2.5.1 locfit_1.5-9.4
## [35] bitops_1.0-7 spatstat.utils_2.2-0
## [37] cachem_1.0.5 DelayedArray_0.18.0
## [39] assertthat_0.2.1 promises_1.2.0.1
## [41] nnet_7.3-16 gtable_0.3.0
## [43] globals_0.14.0 goftest_1.2-2
## [45] timeDate_3043.102 rlang_0.4.11
## [47] systemfonts_1.0.2 splines_4.1.0
## [49] lazyeval_0.2.2 ModelMetrics_1.2.2.2
## [51] spatstat.geom_2.2-2 yaml_2.2.1
## [53] reshape2_1.4.4 abind_1.4-5
## [55] httpuv_1.6.1 caret_6.0-88
## [57] tools_4.1.0 lava_1.6.9
## [59] ellipsis_0.3.2 spatstat.core_2.3-0
## [61] jquerylib_0.1.4 proxy_0.4-26
## [63] ggridges_0.5.3 Rcpp_1.0.7
## [65] plyr_1.8.6 sparseMatrixStats_1.4.0
## [67] zlibbioc_1.38.0 purrr_0.3.4
## [69] RCurl_1.98-1.4 rpart_4.1-15
## [71] deldir_0.2-10 pbapply_1.4-3
## [73] viridis_0.6.1 zoo_1.8-9
## [75] ggrepel_0.9.1 cluster_2.1.2
## [77] fs_1.5.0 magrittr_2.0.1
## [79] data.table_1.14.0 scattermore_0.7
## [81] lmtest_0.9-38 RANN_2.6.1
## [83] fitdistrplus_1.1-5 patchwork_1.1.1
## [85] mime_0.11 evaluate_0.14
## [87] xtable_1.8-4 gridExtra_2.3
## [89] compiler_4.1.0 transport_0.12-2
## [91] tibble_3.1.4 KernSmooth_2.23-20
## [93] crayon_1.4.1 htmltools_0.5.1.1
## [95] mgcv_1.8-36 later_1.2.0
## [97] tidyr_1.1.3 lubridate_1.7.10
## [99] DBI_1.1.1 MASS_7.3-54
## [101] Matrix_1.3-4 cli_3.0.1
## [103] gower_0.2.2 igraph_1.2.6
## [105] pkgconfig_2.0.3 pkgdown_1.6.1
## [107] plotly_4.9.4.1 spatstat.sparse_2.0-0
## [109] recipes_0.1.16 foreach_1.5.1
## [111] bslib_0.2.5.1 XVector_0.32.0
## [113] prodlim_2019.11.13 digest_0.6.27
## [115] sctransform_0.3.2 RcppAnnoy_0.0.18
## [117] spatstat.data_2.1-0 rmarkdown_2.9
## [119] leiden_0.3.8 uwot_0.1.10
## [121] edgeR_3.34.0 DelayedMatrixStats_1.14.0
## [123] kernlab_0.9-29 shiny_1.6.0
## [125] lifecycle_1.0.0 nlme_3.1-152
## [127] jsonlite_1.7.2 desc_1.3.0
## [129] viridisLite_0.4.0 limma_3.48.3
## [131] fansi_0.5.0 pillar_1.6.2
## [133] spatstat.linnet_2.3-0 lattice_0.20-44
## [135] fastmap_1.1.0 httr_1.4.2
## [137] survival_3.2-11 glue_1.4.2
## [139] spatstat_2.2-0 png_0.1-7
## [141] iterators_1.0.13 class_7.3-19
## [143] stringi_1.7.3 sass_0.4.0
## [145] textshaping_0.3.5 memoise_2.0.0
## [147] irlba_2.3.3 e1071_1.7-7
## [149] future.apply_1.7.0