---
title: "TNBC"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{TNBC}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
# knitr::opts_chunk$set(eval = !is_check)
```
```{r setup}
library(funkycells)
```
```{r define_functions, echo=FALSE}
full_run <- FALSE # This runs only computations that are feasibly quick
paperRun <- FALSE * full_run # This creates and saves figures as in paper
save_path <- tempdir() # Location to save files
specify_decimal <- function(x, k) {
trimws(format(round(x, k), nsmall = k))
}
```
This vignette documents a sample analysis of Triple Negative Breast Cancer (TNBC) data, retrieved from \url{https://www.angelolab.com/mibi-data}. We consider the classified phenotypes rather than the direct protein data, pre-loaded in this package.
```{r getData}
TNBC_pheno <- TNBC_pheno
TNBC_meta <- TNBC_meta
```
We begin by defining the phenotypes of interest as well as the related interactions.
```{r specifyPheno}
phenos <- unique(TNBC_pheno$Phenotype)
pheno_interactions <- rbind(
data.frame(t(combn(phenos, 2))),
data.frame("X1" = phenos, "X2" = phenos)
)
phenos_subset <- c(
"Tumor", "CD3T", "CD4T", "CD8T", "B", "DC",
"DCMono", "Macrophage", "MonoNeu", "NK", "Treg"
)
pheno_interactions_subset <- data.frame(
Var1 = rep("Tumor", 11),
Var2 = c(
"Tumor", "CD3T", "CD4T", "CD8T", "B", "DC",
"DCMono", "Macrophage", "MonoNeu", "NK", "Treg"
)
)
```
We notice there are `r length(phenos)` total phenotypes with
`r nrow(pheno_interactions)` total possible interactions. Alternatively, we can
also focus on a subset of phenotypes (`r length(phenos_subset)`) and
interactions (`r nrow(pheno_interactions_subset)`) of particular interest. Another analysis on the subset interaction set has been considered but was suppressed for brevity.
In order to build confidence in our approach, we begin by simulating data with a similar structure to the full phenotype data and evaluating the effectiveness of the approach.
To start, let us determine the agent information in a typical image.
```{r TNBCInfo}
data_pheno_stat <- data.frame("pheno" = phenos)
for (person in unique(TNBC_pheno$Person)) {
tmp <- as.data.frame(table(TNBC_pheno[TNBC_pheno$Person == person, "Phenotype"]))
colnames(tmp) <- c("pheno", person)
data_pheno_stat <- merge(x = data_pheno_stat, y = tmp, all.x = TRUE)
}
agent_info_subset <- data.frame(data_pheno_stat[1],
"mean" = rowMeans(data_pheno_stat[-1], na.rm = TRUE),
"med" = apply(data_pheno_stat[-1], 1, median, na.rm = TRUE)
)
```
```{r paper_K_example, echo=FALSE, eval=paperRun}
tnbc_fig <- plotPP(TNBC_pheno[TNBC_pheno$Person == 2, c("cellx", "celly", "Phenotype")],
ptSize = 3, colorGuide = "none", dropAxes = T
)
png(paste0(save_path,"/paper/tnbc_person2_phenotypes.png"),
width = 800, height = 800)
print(tnbc_fig)
dev.off()
kf <- getKFunction(
data = TNBC_pheno[TNBC_pheno$Person == 2, -1],
agents = c("Tumor", "MonoNeu"),
unit = "Person",
rCheckVals = seq(0, 50, 1)
)
k_fig <- ggplot2::ggplot() +
ggplot2::geom_line(ggplot2::aes(x = r, y = K1), data = kf, linewidth = 2) +
ggplot2::geom_line(ggplot2::aes(x = seq(0, 50, 1), y = pi * seq(0, 50, 1)^2),
linewidth = 2, col = "red", linetype = "dashed"
) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.title = ggplot2::element_blank(),
axis.text = ggplot2::element_blank()
)
png(paste0(save_path,"/paper/tnbc_person2_tumor_mononeu_k.png"),
width = 800, height = 800)
print(k_fig)
dev.off()
```
Now let us generate two data sets: (1) where no informative interactions are
present and (2) where some informative interactions are present. We generate
$34$ patients, comparative to the `r length(unique(TNBC_pheno$Person))` TNBC
patients. We generate $15$ for each class with possible interactions, if
present, and $2$ for each class that are indistinguishable to add some noise to
the data.
The non-interactive data is simulated as follows.
```{r buildNUllData}
set.seed(123)
dat0 <- simulatePP(
agentVarData =
data.frame(
"outcome" = c(0, 1),
"c1" = c(0, 0),
"c2" = c(1 / 25, 1 / 25), "c3" = c(1 / 50, 1 / 50),
"c4" = c(0, 0),
"c5" = c(0, 0), "c6" = c(0, 0),
"c7" = c(0, 0), "c8" = c(1 / 100, 1 / 100),
"c9" = c(1 / 20, 1 / 20), "c10" = c(1 / 250, 1 / 250),
"c11" = c(1 / 100, 1 / 100), "c12" = c(1 / 80, 1 / 80),
"c13" = c(0, 0), "c14" = c(0, 0),
"c15" = c(0, 0), "c16" = c(1 / 10, 1 / 10)
),
agentKappaData = data.frame(
"agent" = paste0("c", 1:16),
"clusterAgent" = c(NA, "c1", "c1", rep(NA, 12), "c1"),
"kappa" = c(
rbinom(1, 100, 0.5),
rbinom(1, 100, 0.5),
rbinom(1, 30, 0.5),
rbinom(1, 80, 0.5),
rbinom(1, 350, 0.5),
rbinom(1, 100, 0.5),
rbinom(1, 120, 0.5),
rbinom(1, 150, 0.5),
rbinom(2, 250, 0.5),
rbinom(1, 600, 0.5),
rbinom(1, 60, 0.5),
rbinom(2, 140, 0.5),
rbinom(1, 20, 0.5),
rbinom(1, 5, 0.5)
)
),
unitsPerOutcome = 15,
replicatesPerUnit = 1,
silent = FALSE
)
dat1 <- simulatePP(
agentVarData =
data.frame(
"outcome" = c(0, 1),
"c1" = c(0, 0),
"c2" = c(1 / 25, 1 / 25), "c3" = c(1 / 50, 1 / 50),
"c4" = c(0, 0),
"c5" = c(0, 0), "c6" = c(0, 0),
"c7" = c(0, 0), "c8" = c(1 / 100, 1 / 100),
"c9" = c(1 / 20, 1 / 20), "c10" = c(1 / 250, 1 / 250),
"c11" = c(1 / 100, 1 / 100), "c12" = c(1 / 80, 1 / 80),
"c13" = c(0, 0), "c14" = c(0, 0),
"c15" = c(0, 0), "c16" = c(1 / 10, 1 / 10)
),
agentKappaData = data.frame(
"agent" = paste0("c", 1:16),
"clusterAgent" = c(NA, "c1", "c1", rep(NA, 12), "c1"),
"kappa" = c(
rbinom(1, 100, 0.5),
rbinom(1, 100, 0.5),
rbinom(1, 30, 0.5),
rbinom(1, 80, 0.5),
rbinom(1, 350, 0.5),
rbinom(1, 100, 0.5),
rbinom(1, 120, 0.5),
rbinom(1, 150, 0.5),
rbinom(2, 250, 0.5),
rbinom(1, 600, 0.5),
rbinom(1, 60, 0.5),
rbinom(2, 140, 0.5),
rbinom(1, 20, 0.5),
rbinom(1, 5, 0.5)
)
),
unitsPerOutcome = 2,
replicatesPerUnit = 1,
silent = F
)
dat1$unit <- ifelse(dat1$unit == "u1", "u31",
ifelse(dat1$unit == "u2", "u32",
ifelse(dat1$unit == "u3", "u33",
ifelse(dat1$unit == "u4", "u34", NA)
)
)
)
dat1$replicate <- ifelse(dat1$replicate == "1", "31",
ifelse(dat1$replicate == "2", "32",
ifelse(dat1$replicate == "3", "33",
ifelse(dat1$replicate == "4", "34", NA)
)
)
)
dat <- rbind(dat0, dat1)
```
Compare images of the data.
```{r tnbc_image, fig.width=5, fig.height=5, fig.cap="TNBC Patient Image"}
plotPP(
data = TNBC_pheno[TNBC_pheno$Person == 2, c("cellx", "celly", "Phenotype")],
ptSize = 1, dropAxes = T, colorGuide = "none"
)
```
```{r sim_image, fig.width=5, fig.height=5, fig.cap="Simulated Patient Image"}
plotPP(dat[dat$replicate == 18, c("x", "y", "type")],
ptSize = 1, dropAxes = T, colorGuide = "none"
)
```
```{r paper_simtnbc_comparision, echo=FALSE, eval=paperRun}
tmp <- length(table(dat[dat$replicate == 1, c("type")]))
clrs <- scales::hue_pal()(tmp)
tnbc_image <- plotPP(
data = TNBC_pheno[TNBC_pheno$Person == 2, c("cellx", "celly", "Phenotype")],
ptSize = 3, dropAxes = T, colorGuide = "none",
colors = clrs[-length(clrs)]
)
png(paste0(save_path,"/paper/tnbc_image.png"),
width = 800, height = 800)
print(tnbc_image)
dev.off()
sim_image <- plotPP(
data = dat[dat$replicate == 18, c("x", "y", "type")],
ptSize = 3, dropAxes = T, colorGuide = "none",
colors = c(clrs[1:2], clrs[16], clrs[3:7], clrs[15], clrs[8:14])
)
png(paste0(save_path,"/paper/sim_image.png"),
width = 800, height = 800)
print(sim_image)
dev.off()
```
Now we compute PCA and attach an age meta-variable (with no effect).
```{r simulateNoInteraction, eval=full_run}
cells <- paste0("c", 1:16)
cells_interactions <- rbind(
data.frame(t(combn(cells, 2))),
data.frame("X1" = cells, "X2" = cells)
)
set.seed(12345)
pcaData <- getKsPCAData(
data = dat, replicate = "replicate",
agents_df = cells_interactions,
xRange = c(0, 1), yRange = c(0, 1),
silent = F
)
pcaMeta <- simulateMeta(pcaData,
metaInfo = data.frame(
"var" = c("age"),
"rdist" = c("rnorm"),
"outcome_0" = c("25"),
"outcome_1" = c("25")
)
)
```
We analyze the data.
```{r analyzeNoEffect, eval=full_run}
set.seed(123)
rfcv <- funkyModel(
data = pcaMeta,
outcome = "outcome",
unit = "unit",
metaNames = c("age")
)
```
Resulting in the variable importance figures. The code for these is given below, but may not be shown due to computational time in creating this vignette.
```{r plot_noeffect_full, fig.width=5, fig.height=5, fig.cap="No Effect Variable Importance (All)", eval=full_run}
rfcv$viPlot
```
```{r plot_noeffect_top25, fig.width=5, fig.height=5, fig.cap="No Effect Variable Importance (Top 25)", eval=full_run}
rfcv$subset_viPlot
```
```{r paper_noEffect, echo=FALSE, eval=paperRun}
# Get Vars
viData <- rfcv$VariableImportance
accData <- rfcv$AccuracyEstimate
NoiseCutoff <- rfcv$NoiseCutoff
InterpolationCutoff <- rfcv$InterpolationCutoff
subsetPlotSize <- rfcv$AdditionalParams$subsetPlotSize
# Plot Full
maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_full <- ggplot2::ggplot(
data = viData,
mapping = ggplot2::aes(
x = factor(stats::reorder(var, est)),
y = ifelse(est / maxVal > 1, 1,
ifelse(est / maxVal < 0, 0,
est / maxVal
)
),
group = 1
)
) +
ggplot2::geom_errorbar(
ggplot2::aes(
ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
),
color = "black", width = 0.2
) +
ggplot2::geom_point(color = "black", size = 3) +
ggplot2::geom_hline(
ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
color = "red", linetype = "dotted", linewidth = 2
) +
ggplot2::coord_flip(ylim = c(0, 1)) +
ggplot2::xlab(NULL) +
ggplot2::ylim(c(0, 1)) +
ggplot2::ylab(NULL) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.text = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
axis.title = ggplot2::element_text(size = 20)
) +
ggplot2::geom_line(ggplot2::aes(
x = ordered(viData[order(-est), "var"]),
y = InterpolationCutoff / maxVal
), color = "orange", linetype = "dashed", linewidth = 2) +
ggplot2::ylab(paste0(
"OOB (",
specify_decimal(accData$OOB, 2),
"), Guess (",
specify_decimal(accData$guess, 2),
"), Bias (",
specify_decimal(accData$bias, 2), ")"
))
png(paste0(save_path,"/paper/noeffect_variableimportance.png"))
print(plot_full)
dev.off()
# Plot Subset
InterpolationCutoff <- InterpolationCutoff[1:subsetPlotSize]
viData <- viData[order(-viData$est), ]
viData <- viData[1:subsetPlotSize, ]
maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_top25 <- ggplot2::ggplot(
data = viData,
mapping = ggplot2::aes(
x = factor(stats::reorder(var, est)),
y = ifelse(est / maxVal > 1, 1,
ifelse(est / maxVal < 0, 0,
est / maxVal
)
),
group = 1
)
) +
ggplot2::geom_errorbar(
ggplot2::aes(
ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
),
color = "black", width = 0.2
) +
ggplot2::geom_point(color = "black", size = 3) +
ggplot2::geom_hline(
ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
color = "red", linetype = "dotted", linewidth = 2
) +
ggplot2::coord_flip(ylim = c(0, 1)) +
ggplot2::xlab(NULL) +
ggplot2::ylim(c(0, 1)) +
ggplot2::ylab(NULL) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.text = ggplot2::element_text(size = 18),
axis.text.x = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
axis.title = ggplot2::element_text(size = 20)
) +
ggplot2::geom_line(ggplot2::aes(
x = ordered(viData[order(-est), "var"]),
y = InterpolationCutoff / maxVal
), color = "orange", linetype = "dashed", linewidth = 2) +
ggplot2::ylab(paste0(
"OOB (",
specify_decimal(accData$OOB, 2),
"), Guess (",
specify_decimal(accData$guess, 2),
"), Bias (",
specify_decimal(accData$bias, 2), ")"
))
png(paste0(save_path,"/paper/noeffect_variableimportance_top25.png"))
print(plot_top25)
dev.off()
```
Now we consider the simulated model with effects.
```{r effectSimulationAndModel, eval=full_run}
set.seed(123456)
dat0 <- simulatePP(
agentVarData =
data.frame(
"outcome" = c(0, 1),
"c1" = c(0, 0),
"c2" = c(1 / 25, 1 / 60), "c3" = c(1 / 50, 1 / 10),
"c4" = c(0, 0),
"c5" = c(0, 0), "c6" = c(0, 0),
"c7" = c(0, 0), "c8" = c(1 / 100, 1 / 100),
"c9" = c(1 / 20, 1 / 20), "c10" = c(1 / 250, 1 / 250),
"c11" = c(1 / 100, 1 / 100), "c12" = c(1 / 80, 1 / 80),
"c13" = c(0, 0), "c14" = c(0, 0),
"c15" = c(0, 0), "c16" = c(1 / 10, 1 / 10)
),
agentKappaData = data.frame(
"agent" = paste0("c", 1:16),
"clusterAgent" = c(NA, "c1", "c1", rep(NA, 12), "c1"),
"kappa" = c(
rbinom(1, 100, 0.5),
rbinom(1, 100, 0.5),
rbinom(1, 30, 0.5),
rbinom(1, 80, 0.5),
rbinom(1, 350, 0.5),
rbinom(1, 100, 0.5),
rbinom(1, 120, 0.5),
rbinom(1, 150, 0.5),
rbinom(2, 250, 0.5),
rbinom(1, 600, 0.5),
rbinom(1, 60, 0.5),
rbinom(2, 140, 0.5),
rbinom(1, 20, 0.5),
rbinom(1, 5, 0.5)
)
),
unitsPerOutcome = 15,
replicatesPerUnit = 1,
silent = F
)
dat1 <- simulatePP(
agentVarData =
data.frame(
"outcome" = c(0, 1),
"c1" = c(0, 0),
"c2" = c(1 / 25, 1 / 25), "c3" = c(1 / 50, 1 / 50),
"c4" = c(0, 0),
"c5" = c(0, 0), "c6" = c(0, 0),
"c7" = c(0, 0), "c8" = c(1 / 100, 1 / 100),
"c9" = c(1 / 20, 1 / 20), "c10" = c(1 / 250, 1 / 250),
"c11" = c(1 / 100, 1 / 100), "c12" = c(1 / 80, 1 / 80),
"c13" = c(0, 0), "c14" = c(0, 0),
"c15" = c(0, 0), "c16" = c(1 / 10, 1 / 10)
),
agentKappaData = data.frame(
"agent" = paste0("c", 1:16),
"clusterAgent" = c(NA, "c1", "c1", rep(NA, 12), "c1"),
"kappa" = c(
rbinom(1, 100, 0.5),
rbinom(1, 100, 0.5),
rbinom(1, 30, 0.5),
rbinom(1, 80, 0.5),
rbinom(1, 350, 0.5),
rbinom(1, 100, 0.5),
rbinom(1, 120, 0.5),
rbinom(1, 150, 0.5),
rbinom(2, 250, 0.5),
rbinom(1, 600, 0.5),
rbinom(1, 60, 0.5),
rbinom(2, 140, 0.5),
rbinom(1, 20, 0.5),
rbinom(1, 5, 0.5)
)
),
unitsPerOutcome = 2,
replicatesPerUnit = 1,
silent = F
)
dat1$unit <- ifelse(dat1$unit == "u1", "u31",
ifelse(dat1$unit == "u2", "u32",
ifelse(dat1$unit == "u3", "u33",
ifelse(dat1$unit == "u4", "u34", NA)
)
)
)
dat1$replicate <- ifelse(dat1$replicate == "1", "31",
ifelse(dat1$replicate == "2", "32",
ifelse(dat1$replicate == "3", "33",
ifelse(dat1$replicate == "4", "34", NA)
)
)
)
dat <- rbind(dat0, dat1)
cells <- paste0("c", 1:16)
cells_interactions <- rbind(
data.frame(t(combn(cells, 2))),
data.frame("X1" = cells, "X2" = cells)
)
pcaData <- getKsPCAData(
data = dat, replicate = "replicate",
agents_df = cells_interactions,
xRange = c(0, 1), yRange = c(0, 1),
silent = F
)
pcaMeta <- simulateMeta(pcaData,
metaInfo = data.frame(
"var" = c("age"),
"rdist" = c("rnorm"),
"outcome_0" = c("25"),
"outcome_1" = c("27")
)
)
rfcv <- funkyModel(
data = pcaMeta,
outcome = "outcome",
unit = "unit",
metaNames = c("age")
)
```
Creating the variable importance plots as before (code below, perhaps figure not given for computational reasons)
```{r plot_effect_full, fig.width=5, fig.height=5, fig.cap="Effect Simulation Variable Importance (All)", eval=full_run}
rfcv$viPlot
```
```{r plot_effect_top25, fig.width=5, fig.height=5, fig.cap="Effect Simulation Variable Importance (Top 25)", eval=full_run}
rfcv$subset_viPlot
```
```{r paper_effect, echo=FALSE, eval=paperRun}
# Get Vars
viData <- rfcv$VariableImportance
accData <- rfcv$AccuracyEstimate
NoiseCutoff <- rfcv$NoiseCutoff
InterpolationCutoff <- rfcv$InterpolationCutoff
subsetPlotSize <- rfcv$AdditionalParams$subsetPlotSize
# Plot Full
maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_full <- ggplot2::ggplot(
data = viData,
mapping = ggplot2::aes(
x = factor(stats::reorder(var, est)),
y = ifelse(est / maxVal > 1, 1,
ifelse(est / maxVal < 0, 0,
est / maxVal
)
),
group = 1
)
) +
ggplot2::geom_errorbar(
ggplot2::aes(
ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
),
color = "black", width = 0.2
) +
ggplot2::geom_point(color = "black", size = 3) +
ggplot2::geom_hline(
ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
color = "red", linetype = "dotted", linewidth = 2
) +
ggplot2::coord_flip(ylim = c(0, 1)) +
ggplot2::xlab(NULL) +
ggplot2::ylim(c(0, 1)) +
ggplot2::ylab(NULL) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.text = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
axis.title = ggplot2::element_text(size = 20)
) +
ggplot2::geom_line(ggplot2::aes(
x = ordered(viData[order(-est), "var"]),
y = InterpolationCutoff / maxVal
), color = "orange", linetype = "dashed", linewidth = 2) +
ggplot2::ylab(paste0(
"OOB (",
specify_decimal(accData$OOB, 2),
"), Guess (",
specify_decimal(accData$guess, 2),
"), Bias (",
specify_decimal(accData$bias, 2), ")"
))
png(paste0(save_path,"/paper/effect_variableimportance.png"))
print(plot_full)
dev.off()
# Plot Subset
InterpolationCutoff <- InterpolationCutoff[1:subsetPlotSize]
viData <- viData[order(-viData$est), ]
viData <- viData[1:subsetPlotSize, ]
maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_top25 <- ggplot2::ggplot(
data = viData,
mapping = ggplot2::aes(
x = factor(stats::reorder(var, est)),
y = ifelse(est / maxVal > 1, 1,
ifelse(est / maxVal < 0, 0,
est / maxVal
)
),
group = 1
)
) +
ggplot2::geom_errorbar(
ggplot2::aes(
ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
),
color = "black", width = 0.2
) +
ggplot2::geom_point(color = "black", size = 3) +
ggplot2::geom_hline(
ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
color = "red", linetype = "dotted", linewidth = 2
) +
ggplot2::coord_flip(ylim = c(0, 1)) +
ggplot2::xlab(NULL) +
ggplot2::ylim(c(0, 1)) +
ggplot2::ylab(NULL) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.text = ggplot2::element_text(size = 18),
axis.text.x = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
axis.title = ggplot2::element_text(size = 20)
) +
ggplot2::geom_line(ggplot2::aes(
x = ordered(viData[order(-est), "var"]),
y = InterpolationCutoff / maxVal
), color = "orange", linetype = "dashed", linewidth = 2) +
ggplot2::ylab(paste0(
"OOB (",
specify_decimal(accData$OOB, 2),
"), Guess (",
specify_decimal(accData$guess, 2),
"), Bias (",
specify_decimal(accData$bias, 2), ")"
))
png(paste0(save_path,"/paper/effect_variableimportance_top25.png"))
print(plot_top25)
dev.off()
```
With this confidence, we now consider the TNBC phenotype data.
```{r tnbc_pheno_model, eval=full_run}
dataPCA_pheno <- getKsPCAData(
data = TNBC_pheno, unit = "Person",
agents_df = pheno_interactions,
rCheckVals = seq(0, 50, 1)
)
dataPCAAge_pheno <- merge(dataPCA_pheno, TNBC_meta)
set.seed(123456)
rfcv <- funkyModel(
data = dataPCAAge_pheno, K = 10,
outcome = "Class", unit = "Person",
metaNames = c("Age"), synthetics = 100,
alpha = 0.05, silent = FALSE,
subsetPlotSize = 25
)
```
And the related variable importance plots (may not be shown for computational reasons).
```{r tnbc_full, fig.width=5, fig.height=5, fig.cap="TNBC Phenotype Variable Importance (All)", eval=full_run}
rfcv$viPlot
```
```{r tnbc_top25, fig.width=5, fig.height=5, fig.cap="TNBC Phenotype Variable Importance (Top 25)", eval=full_run}
rfcv$subset_viPlot
```
```{r paper_tnbc_variable_importance, echo=FALSE, eval=paperRun}
# Get Vars
viData <- rfcv$VariableImportance
accData <- rfcv$AccuracyEstimate
NoiseCutoff <- rfcv$NoiseCutoff
InterpolationCutoff <- rfcv$InterpolationCutoff
subsetPlotSize <- rfcv$AdditionalParams$subsetPlotSize
# Plot Full
maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_full <- ggplot2::ggplot(
data = viData,
mapping = ggplot2::aes(
x = factor(stats::reorder(var, est)),
y = ifelse(est / maxVal > 1, 1,
ifelse(est / maxVal < 0, 0,
est / maxVal
)
),
group = 1
)
) +
ggplot2::geom_errorbar(
ggplot2::aes(
ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
),
color = "black", width = 0.2
) +
ggplot2::geom_point(color = "black", size = 3) +
ggplot2::geom_hline(
ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
color = "red", linetype = "dotted", linewidth = 2
) +
ggplot2::coord_flip(ylim = c(0, 1)) +
ggplot2::xlab(NULL) +
ggplot2::ylim(c(0, 1)) +
ggplot2::ylab(NULL) +
ggplot2::theme_bw() +
ggplot2::theme(
# axis.text = ggplot2::element_text(size = 18),
axis.text = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
axis.title = ggplot2::element_text(size = 20)
) +
ggplot2::geom_line(ggplot2::aes(
x = ordered(viData[order(-est), "var"]),
y = InterpolationCutoff / maxVal
), color = "orange", linetype = "dashed", linewidth = 2) +
ggplot2::ylab(paste0(
"OOB (",
specify_decimal(accData$OOB, 2),
"), Guess (",
specify_decimal(accData$guess, 2),
"), Bias (",
specify_decimal(accData$bias, 2), ")"
))
png(paste0(save_path,"/paper/tnbc_variableimportance.png"))
print(plot_full)
dev.off()
# Plot Subset
InterpolationCutoff <- InterpolationCutoff[1:subsetPlotSize]
viData <- viData[order(-viData$est), ]
viData <- viData[1:subsetPlotSize, ]
maxVal <- max(InterpolationCutoff, NoiseCutoff, viData$est)
plot_top25 <- ggplot2::ggplot(
data = viData,
mapping = ggplot2::aes(
x = factor(stats::reorder(var, est)),
y = ifelse(est / maxVal > 1, 1,
ifelse(est / maxVal < 0, 0,
est / maxVal
)
),
group = 1
)
) +
ggplot2::geom_errorbar(
ggplot2::aes(
ymin = ifelse((est - sd) / maxVal < 0, 0, (est - sd) / maxVal),
ymax = ifelse((est + sd) / maxVal > 1, 1, (est + sd) / maxVal)
),
color = "black", width = 0.2
) +
ggplot2::geom_point(color = "black", size = 3) +
ggplot2::geom_hline(
ggplot2::aes(yintercept = max(0, min(1, NoiseCutoff / maxVal))),
color = "red", linetype = "dotted", linewidth = 2
) +
ggplot2::coord_flip(ylim = c(0, 1)) +
ggplot2::xlab(NULL) +
ggplot2::ylim(c(0, 1)) +
ggplot2::ylab(NULL) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.text.x = ggplot2::element_blank(),
axis.text.y = ggplot2::element_text(size = 14),
axis.ticks.y = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
axis.title = ggplot2::element_text(size = 17)
) +
ggplot2::geom_line(ggplot2::aes(
x = ordered(viData[order(-est), "var"]),
y = InterpolationCutoff / maxVal
), color = "orange", linetype = "dashed", linewidth = 2) +
ggplot2::ylab(paste0(
"OOB (",
specify_decimal(accData$OOB, 2),
"), Guess (",
specify_decimal(accData$guess, 2),
"), Bias (",
specify_decimal(accData$bias, 2), ")"
))
png(paste0(save_path,"/paper/tnbc_variableimportance_top25.png"))
print(plot_top25)
dev.off()
```
Also consider $K$ functions from significant and insignificant interactions.
```{r significantTNBCK}
tmp <- getKFunction(TNBC_pheno[TNBC_pheno$Class == 0, -1],
c("Tumor", "Tumor"),
unit = "Person",
rCheckVals = seq(0, 50, 1)
)
tmp1 <- getKFunction(TNBC_pheno[TNBC_pheno$Class == 1, -1],
c("Tumor", "Tumor"),
unit = "Person",
rCheckVals = seq(0, 50, 1)
)
tmp_1 <- tidyr::pivot_longer(data = tmp, cols = -r)
tmp1_1 <- tidyr::pivot_longer(data = tmp1, cols = -r)
data_plot <- rbind(
data.frame(
"r" = tmp_1$r,
"K" = tmp_1$value,
"unit" = tmp_1$name,
"outcome" = "0"
),
data.frame(
"r" = tmp1_1$r,
"K" = tmp1_1$value,
"unit" = paste0(tmp1_1$name, "_1"),
"outcome" = "1"
)
)
```
Creating the following figure.
```{r significantTNBCKPlot, fig.width=5, fig.height=5, fig.cap="TNBC Phenotype Significant K Function"}
plot_K_functions(data_plot)
```
```{r paper_sig_K_functions, echo=FALSE, eval=paperRun}
data <- data_plot
# Prep Legend
info <- unique(data[, c("unit", "outcome")])
info$Missing <- NA
for (i in 1:nrow(info)) {
info[i, 3] <- nrow(data[data$unit == info$unit[i] &
!stats::complete.cases(data), ]) > 0
}
info_labels <- data.frame(
"outcome" = unique(data$outcome),
"units" = NA,
"Missing" = NA,
"Label" = NA
)
for (i in 1:nrow(info_labels)) {
info_labels[i, 2] <- nrow(info[info$outcome == info_labels[i, 1], ])
info_labels[i, 3] <- nrow(info[info$outcome == info_labels[i, 1] &
info$Missing, ])
# Only label outcomes with numbers if at least one is missing in whole set
if (sum(info$Missing) != 0) {
info_labels[i, 4] <- paste0(
info_labels[i, 1], " (",
info_labels[i, 2] - info_labels[i, 3], "/",
info_labels[i, 2], ")"
)
} else {
info_labels[i, 4] <- info_labels[i, 1]
}
}
# Build Averages
data_wide <- tidyr::pivot_wider(data,
names_from = "unit",
values_from = "K"
)
data_avg <- data.frame("r" = unique(data_wide$r))
for (i in 1:nrow(info_labels)) {
data_avg[[info_labels[i, "outcome"]]] <-
rowMeans(data_wide[data_wide$outcome == info_labels[i, "outcome"], -c(1:2)],
na.rm = TRUE
)
}
data_avg <- tidyr::pivot_longer(data_avg, cols = -r)
colnames(data_avg) <- c("r", "outcome", "Value")
# Plot
return_plot <-
ggplot2::ggplot(
data = stats::na.omit(data),
ggplot2::aes(
x = r, y = K,
group = unit, color = outcome
)
) +
ggplot2::geom_line(alpha = 0.2, linewidth = 1.25) +
ggplot2::geom_line(
ggplot2::aes(
x = r, y = Value,
group = outcome, color = outcome
),
data = data_avg, linewidth = 2
) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.text = ggplot2::element_text(size = 18),
axis.text.y = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
# axis.title = ggplot2::element_text(size = 22),
axis.title = ggplot2::element_blank(),
legend.position = "none"
) # +
# ggplot2::geom_line(ggplot2::aes(x = r, y = pi * r^2,
# group = outcome),
# data = data_avg, linewidth = 1.25,
# color = "black", linetype = "dashed"
# )
png(paste0(save_path,"/paper/TNBC_sig_KFunctions.png"))
print(return_plot)
dev.off()
```
And for a insignificant interaction.
```{r insignificantTNBCK}
tmp <- getKFunction(TNBC_pheno[TNBC_pheno$Class == 0, -1],
c("CD4T", "Endothelial"),
unit = "Person",
rCheckVals = seq(0, 50, 1)
)
tmp1 <- getKFunction(TNBC_pheno[TNBC_pheno$Class == 1, -1],
c("CD4T", "Endothelial"),
unit = "Person",
rCheckVals = seq(0, 50, 1)
)
tmp_1 <- tidyr::pivot_longer(data = tmp, cols = -r)
tmp1_1 <- tidyr::pivot_longer(data = tmp1, cols = -r)
data_plot <- rbind(
data.frame(
"r" = tmp_1$r,
"K" = tmp_1$value,
"unit" = tmp_1$name,
"outcome" = "0"
),
data.frame(
"r" = tmp1_1$r,
"K" = tmp1_1$value,
"unit" = paste0(tmp1_1$name, "_1"),
"outcome" = "1"
)
)
```
Creating the following figure.
```{r insignificantTNBCKPlot, fig.width=5, fig.height=5, fig.cap="TNBC Phenotype Insignficant K Function"}
plot_K_functions(data_plot)
```
```{r paper_insig_K_functions, echo=FALSE, eval=paperRun}
data <- data_plot
# Prep Legend
info <- unique(data[, c("unit", "outcome")])
info$Missing <- NA
for (i in 1:nrow(info)) {
info[i, 3] <- nrow(data[data$unit == info$unit[i] &
!stats::complete.cases(data), ]) > 0
}
info_labels <- data.frame(
"outcome" = unique(data$outcome),
"units" = NA,
"Missing" = NA,
"Label" = NA
)
for (i in 1:nrow(info_labels)) {
info_labels[i, 2] <- nrow(info[info$outcome == info_labels[i, 1], ])
info_labels[i, 3] <- nrow(info[info$outcome == info_labels[i, 1] &
info$Missing, ])
# Only label outcomes with numbers if at least one is missing in whole set
if (sum(info$Missing) != 0) {
info_labels[i, 4] <- paste0(
info_labels[i, 1], " (",
info_labels[i, 2] - info_labels[i, 3], "/",
info_labels[i, 2], ")"
)
} else {
info_labels[i, 4] <- info_labels[i, 1]
}
}
# Build Averages
data_wide <- tidyr::pivot_wider(data,
names_from = "unit",
values_from = "K"
)
data_avg <- data.frame("r" = unique(data_wide$r))
for (i in 1:nrow(info_labels)) {
data_avg[[info_labels[i, "outcome"]]] <-
rowMeans(data_wide[data_wide$outcome == info_labels[i, "outcome"], -c(1:2)],
na.rm = TRUE
)
}
data_avg <- tidyr::pivot_longer(data_avg, cols = -r)
colnames(data_avg) <- c("r", "outcome", "Value")
# Plot
return_plot <-
ggplot2::ggplot(
data = stats::na.omit(data),
ggplot2::aes(
x = r, y = K,
group = unit, color = outcome
)
) +
ggplot2::geom_line(alpha = 0.2, linewidth = 1.25) +
ggplot2::geom_line(
ggplot2::aes(
x = r, y = Value,
group = outcome, color = outcome
),
data = data_avg, linewidth = 2
) +
ggplot2::theme_bw() +
ggplot2::theme(
axis.text = ggplot2::element_text(size = 18),
axis.text.y = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
# axis.title = ggplot2::element_text(size = 22),
axis.title = ggplot2::element_blank(),
legend.position = "none"
) # +
# ggplot2::geom_line(ggplot2::aes(x = r, y = pi * r^2,
# group = outcome),
# data = data_avg, linewidth = 1.25,
# color = "black", linetype = "dashed"
# )
png(paste0(save_path,"/paper/TNBC_insig_KFunctions.png"))
print(return_plot)
dev.off()
```
Further analysis of this data can be found in our paper.