This report compares inferred BRCA ATAC-seq peaks linked to upregulated distal genes to ELMER (Enhancer Linking by Methylation/Expression Relationships) inferred gene-DNA methylation probe linkages. The ELMER supervised analysis was performed comparing all different molecular subtypes (Luminal, Basal, Her2, Normal-like) and all code used to analyze the data is shown in section “ELMER supervised analysis complete code”. ELMER is available at Bioconductor (http://bioconductor.org/packages/ELMER/).
This section compares ELMER TCGA-BRCA and ATAC-seq inference linkages of loci (DNA methylation loss and ATAC-seq peak) and distal gene. The following subsections will perform the comparison step by step.
atac_peak_linkage <- readr::read_tsv("~/paper_elmer/atac-seq/BRCA-atac-seq/BRCA_ATAC_Peak_Linage.txt",
col_types = "cdddcdcdcdddccdddc")
atac_peak_linkage <- atac_peak_linkage[atac_peak_linkage$cor > 0,] # keep only positive associations (atac-seq peak and higher gene expression)
atac_peak_linkage
probes.metadata.file <- "http://zwdzwd.io/InfiniumAnnotation/current/hm450/hm450.hg38.manifest.rds"
if(!file.exists(basename(probes.metadata.file))) downloader::download(probes.metadata.file,basename(probes.metadata.file))
hm450.hg38.manifest <- readRDS("hm450.hg38.manifest.rds")
probes.metadata <- as_tibble(hm450.hg38.manifest)
probes.metadata$Probe <- names(hm450.hg38.manifest)
probes.metadata
hits <- suppressWarnings(findOverlaps(atac_peak_linkage.gr,hm450.hg38.manifest))
atac_peak_linkage.gr$overlap_450K_probe <- "No"
overlaped <- unique(queryHits(hits))
atac_peak_linkage.gr$overlap_450K_probe[overlaped] <- "yes"
distal.probes <- ELMER::get.feature.probe(feature = NULL,
genome = "hg38",
met.platform = "450K")
hits.distal <- suppressWarnings(findOverlaps(atac_peak_linkage.gr,distal.probes))
atac_peak_linkage.gr$overlaps.distal.probes <- "No"
atac_peak_linkage.gr$overlaps.distal.probes[unique(queryHits(hits.distal))] <- "Yes"
# Since ELMER uses a 500bp window for motif search, we will consider this window in our analysis to compare with ATAC-seq peaks
distal_probes_extended_500bp <- resize(distal.probes,width = 501,fix = "center")
hits.distal.extended <- suppressWarnings(findOverlaps(atac_peak_linkage.gr,distal_probes_extended_500bp))
atac_peak_linkage.gr$overlaps.distal.probes.extend <- "No"
atac_peak_linkage.gr$overlaps.distal.probes.extend[unique(queryHits(hits.distal.extended))] <- "Yes"
data.frame(overlapping_atac_seq_peaks = c(length(unique(queryHits(hits))),
length(unique(queryHits(hits.distal))),
length(unique(queryHits(hits.distal.extended)))),
overlapping_probes = c(length(unique(subjectHits(hits))),
length(unique(subjectHits(hits.distal))),
length(unique(subjectHits(hits.distal.extended)))),
nb_probes = c(length(hm450.hg38.manifest),length(distal.probes),length(distal.probes)),
percentage_probes = c(paste0(format(round(100*length(unique(subjectHits(hits)))/length(hm450.hg38.manifest), 2), nsmall = 2),"%"),
paste0(format(round(100*length(unique(subjectHits(hits.distal)))/length(distal.probes), 2), nsmall = 2),"%"),
paste0(format(round(100*length(unique(subjectHits(hits.distal.extended)))/length(distal.probes), 2), nsmall = 2),"%")),
row.names = c("450K probes","Distal probes (+-2KB away from TSS)","Distal probes extended (+- 250bp)"))
Since our ATAC-seq links use gene symbols we will recover its ENSEMBL gene ID (from GENCODE v22) for comparison with ELMER links.
library(limma)
hits <- findOverlaps(atac_peak_linkage.gr,distal_probes_extended_500bp)
peaks.overlapping.probes <- atac_peak_linkage.gr[queryHits(hits)]
peaks.overlapping.probes$Probe <- names(distal_probes_extended_500bp[subjectHits(hits)])
peaks.overlapping.probes$Peak_id <- paste(seqnames(peaks.overlapping.probes),
start(peaks.overlapping.probes),
end(peaks.overlapping.probes),
peaks.overlapping.probes$linkedGene,sep = ".")
pairs <- tibble::as.tibble(peaks.overlapping.probes)
pairs <- pairs[,c("Probe","linkedGene","Peak_id")]
gene.info <- TCGAbiolinks:::get.GRCh.bioMart(genome = "hg38")
pairs <- merge(pairs,gene.info,by.x = "linkedGene",by.y = "external_gene_name",all.x = TRUE)
colnames(pairs)[8] <- "GeneID"
idx <- which(is.na(pairs$GeneID))
x <- adply(unique(pairs$linkedGene[idx]),1,
function(x) {
ret <- alias2Symbol(x); ifelse(stringr::str_length(ret[1]) ==0,NA,ret[which(ret %in% gene.info$external_gene_name)])
})
x$X1 <- unique(pairs$linkedGene[idx])
x<- x[!is.na(x$V1),]
pairs[pairs$linkedGene %in% x$X1,]$linkedGene <- x$V1[match(pairs$linkedGene[pairs$linkedGene %in% x$X1],x$X1)]
# GENCODE v22
library(biomaRt)
ensembl <- useMart(biomart = "ENSEMBL_MART_ENSEMBL",
host = "http://mar2016.archive.ensembl.org",
path = "/biomart/martservice" ,
dataset = "hsapiens_gene_ensembl")
attributes <- c("chromosome_name",
"start_position",
"end_position", "strand",
"ensembl_gene_id",
"transcription_start_site",
"transcript_start",
"ensembl_transcript_id",
"external_transcript_name",
"transcript_end",
"external_gene_name")
tss <- getBM(attributes = attributes, mart = ensembl)
idx <- which(is.na(pairs$GeneID))
tss$ensembl_gene_id[match(pairs$linkedGene[idx],tss$external_gene_name)]
pairs$GeneID[idx] <- tss$ensembl_gene_id[match(pairs$linkedGene[idx],tss$external_gene_name)]
idx <- is.na(pairs$GeneID)
update <- alias2SymbolTable(pairs[idx,]$linkedGene)
pairs$linkedGene[idx[which(!is.na(update))]] <- update[!is.na(update)]
pairs[pairs$linkedGene == "GPR125","GeneID"] <- "ENSG00000152990"
pairs[pairs$linkedGene == "TMEM194B","GeneID"] <- "ENSG00000189362"
atac_peak_linkage.gr.ov.450k <- subsetByOverlaps(atac_peak_linkage.gr,distal_probes_extended_500bp)
atac_peak_linkage.gr.ov.450k$GeneID <- pairs$GeneID[match(atac_peak_linkage.gr.ov.450k$linkedGene,pairs$linkedGene)]
save(pairs,atac_peak_linkage.gr.ov.450k,file = "updated_BRCA_peaks_geneID_genenames_.rda")
The randomized approach is described below:
library(plyr)
library(ggthemes)
library(ggplot2)
library(ELMER)
library(readr)
library(plyr)
library(dplyr)
set.seed(123456)
probes.metadata.file <- "http://zwdzwd.io/InfiniumAnnotation/current/hm450/hm450.hg38.manifest.rds"
if(!file.exists(basename(probes.metadata.file))) downloader::download(probes.metadata.file,basename(probes.metadata.file))
hm450.hg38.manifest <- readRDS("hm450.hg38.manifest.rds")
probes.metadata <- as_tibble(hm450.hg38.manifest)
probes.metadata$Probe <- names(hm450.hg38.manifest)
probes.metadata
pair.files <- dir("~/paper_elmer/BRCA_supervised_hg38_sesame_metdiff_015_fdr_n4_pairfdr_n9/",
pattern = ".*.pairs.significant.csv",
full.names = T,
recursive = T,
all.files = T)
analysis <- file.path(basename(dirname(dirname(pair.files))), basename(dirname(pair.files)))
all.pairs <- NULL
all.pairs <- plyr::adply(.data = pair.files,1, function(f) read_csv(f,col_types = readr::cols()),.id = NULL)
all.pairs$id <- paste0(all.pairs$Probe,all.pairs$GeneID)
pairs.with.metadata <- left_join(all.pairs,probes.metadata)
# remove pairs already counted some pairs mught appear in several analysis we don't want to recount them
sig.pairs <- pairs.with.metadata[!duplicated(pairs.with.metadata$id),]
dir.create("random", showWarnings = FALSE)
for(i in 1:20){
col <- paste0("sesame_random_metdiff_015_fdr_n4_pairfdr_n9")
random.pairs <- ELMER::getRandomPairs(sig.pairs,
genome = "hg38",
met.platform = "450K",
cores = 6)
save(random.pairs,file = paste0("random/",col,"_",i,".rda"))
}
For each performed ELMER analysis, we will compare if an ELMER linkage (probe hypomethylated linked to a distal gene highly expressed) is also identified by the ATAC-seq linkages (ATAC-seq peak linked to a distal gene highly expressed).
load("~/paper_elmer/atac-seq/BRCA-atac-seq/updated_BRCA_peaks_geneID_genenames_.rda")
probes.overlap.all.merged <- NULL
all.pairs <- NULL
# Read ELMER linkage results
pair.files <- dir("~/paper_elmer//BRCA_supervised_hg38_sesame_metdiff_015_fdr_n4_pairfdr_n9/",
pattern = ".*.pairs.significant.csv", full.names = T,recursive = T,all.files = T)
analysis <- file.path(basename(dirname(dirname(pair.files))), basename(dirname(pair.files)))
all.pairs <- plyr::adply(.data = pair.files,1,
function(f) {
ret <- read_csv(f,col_types = readr::cols())
ret$analysis <- file.path(basename(dirname(dirname(f))), basename(dirname(f)))
ret
},.id = NULL)
pairs.with.metadata <- left_join(all.pairs,probes.metadata)
# ELMER chosen cut-off
atac.fdr <- c(0.01)
probe.window <- c(500)
overlapping_elmer_links<- NULL
for(a in unique(pairs.with.metadata$analysis)){
for(w in probe.window){
for(k in atac.fdr){
col <- paste0(a,"metdiff_01_metfdr_001_pairfdr_10_minus_8")
pairs.gr <- makeGRangesFromDataFrame(pairs.with.metadata[pairs.with.metadata$analysis == a,],keep.extra.columns = T)
# Check overlap between inferred probes regions and atac-seq regions
probes_extended_500bp <- pairs.gr
probes_extended_500bp <- resize(pairs.gr,width = w + 1,fix = "center")
hits <- suppressWarnings(findOverlaps(atac_peak_linkage.gr.ov.450k,probes_extended_500bp))
peaks.hit <- unique(queryHits(hits))
idx <- which(atac_peak_linkage.gr.ov.450k[queryHits(hits)]$GeneID == probes_extended_500bp[subjectHits(hits)]$GeneID)
values(atac_peak_linkage.gr.ov.450k)[col] <- 0
values(atac_peak_linkage.gr.ov.450k)[[col]][queryHits(hits)[idx]] <- 1
overlapping_elmer_links <- c(overlapping_elmer_links,
list(
col = paste0(probes_extended_500bp[subjectHits(hits)[idx]]$Probe,
".",
probes_extended_500bp[subjectHits(hits)[idx]]$GeneID))
)
}
}
}
names(overlapping_elmer_links) <- unique(pairs.with.metadata$analysis)
aux <- as.data.frame(atac_peak_linkage.gr.ov.450k)
aux$id <- paste0(aux$seqnames,":",aux$start,"-",aux$end)
matrix <- aux[,grep("metdiff_01_metfdr_001_pairfdr_10_minus_8",colnames(aux))]
rownames(matrix) <- paste0("peak_id_",aux$id,"_gene_", aux$linkedGene)
colnames(matrix) <- gsub("metdiff_01_metfdr_001_pairfdr_10_minus_8","",colnames(matrix))
colnames(matrix) <- gsub("\\.hypo|\\.hyper","",sapply(colnames(matrix),
function(x) {
ifelse(grepl("hypo",x),
paste0(unlist(strsplit(x,"_"))[1]," (vs ",
unlist(strsplit(x,"_"))[2],")"),
paste0(unlist(strsplit(x,"_"))[2]," (vs ",
unlist(strsplit(x,"_"))[1],")")
)
}
)
)
#save(matrix,file = "overlap_matrix.rda")
matrix <- data.matrix(matrix)
#save(matrix,file = "overlap_matrix_binary.rda")
library(iheatmapr)
non_zero <- matrix[rowSums(matrix) > 0,]
# overlapping pairs
df <- data.frame("ATAC-seq links overlapping ELMER links" = nrow(non_zero))
df
hm <- main_heatmap(non_zero,name = "Overlap ?") %>%
add_row_clustering() %>%
add_col_clustering() %>%
add_col_groups(gsub("metdiff_01_metfdr_001_pairfdr_10_minus_8","",colnames(matrix)),
side = "bottom", name = "Tumor type",
title = "Tumor type") %>%
#add_col_labels(ticktext = gsub("met.diff_0.1_met.fdr_0.05_pair.fdr_0.005","",colnames(matrix)),font = list(size = 12)) %>%
add_col_title(paste0("ATAC-seq pairs overlapping BRCA ELMER anti-correlated paired gene-probes (n=",nrow(non_zero),")"), side= "top",font = list(size = 16))
order <- dist(matrix, method = "euclidean") %>% hclust(method="ward")
hm2 <- main_heatmap(matrix,name = "Overlap ?", row_order=order$order) %>%
add_col_clustering() %>%
add_col_groups(gsub("metdiff_01_metfdr_001_pairfdr_10_minus_8","",colnames(matrix)),
side = "bottom", name = "Tumor type",
title = "Tumor type") %>%
add_row_title(paste0("ATAC-seq pairs overlapping distal probes (n=",nrow(matrix),")"), side= "left",font = list(size = 16)) %>%
#add_col_labels(ticktext = gsub("met.diff_0.1_met.fdr_0.05_pair.fdr_0.005","",colnames(matrix)),font = list(size = 12)) %>%
add_col_title(paste0("ATAC-seq peaks overlapping ELMER paired gene-probes"), side= "top",font = list(size = 16))
hm
The following code compares ELMER inference to ATAC-seq linkages.
probes.overlap.all.merged <- NULL
all.pairs <- NULL
pair.files <- dir("~/paper_elmer//BRCA_supervised_hg38_sesame_metdiff_015_fdr_n4_pairfdr_n9///",
pattern = ".*.pairs.significant.csv", full.names = T,recursive = T,all.files = T)
analysis <- file.path(basename(dirname(dirname(pair.files))), basename(dirname(pair.files)))
all.pairs <- plyr::adply(.data = pair.files,1, function(f) {
ret <- read_csv(f,col_types = readr::cols())
ret$analysis <- file.path(basename(dirname(dirname(f))), basename(dirname(f)))
ret
},
.id = NULL)
all.pairs$id <- paste0(all.pairs$Probe,all.pairs$GeneID)
pairs.with.metadata <- left_join(all.pairs,probes.metadata)
atac.fdr <- c(0.01)
probe.window <- c(500)
combinations <- expand.grid(atac.fdr,probe.window,stringsAsFactors = FALSE)
colnames(combinations) <- c("atac.fdr","probe.window")
probes.overlap.all.merged <- adply(combinations,1,function(x){
sig.pairs <- pairs.with.metadata
sig.pairs <- sig.pairs[!duplicated(sig.pairs$id),] # remove pairs already counted some pairs mught appear in several analysis we don't want to recount them
pairs.gr <- makeGRangesFromDataFrame(sig.pairs,keep.extra.columns = T)
# Check overlap between inferred probes regions and atac-seq regions
probes_extended <- resize(pairs.gr,width = x$probe.window + 1,fix = "center")
hits <- suppressWarnings(findOverlaps(atac_peak_linkage.gr.ov.450k,probes_extended))
peaks.hit <- unique(queryHits(hits))
# get the line of the hits that also have the same gene
idx <- which(atac_peak_linkage.gr.ov.450k[queryHits(hits)]$GeneID == probes_extended[subjectHits(hits)]$GeneID)
matched.interaction <- unique(queryHits(hits)[idx])
idx <- which(atac_peak_linkage.gr.ov.450k[queryHits(hits)]$linkedGene == pairs.gr[subjectHits(hits)]$Symbol)
matched.interaction.symbol <- unique(queryHits(hits)[idx])
data.frame(
pairs_overlapping = length(matched.interaction),
pairs_overlapping.symbol = length(matched.interaction.symbol),
nb_atac_seq_pairs = length(atac_peak_linkage.gr),
nb_atac_seq_pairs_overlapping_distal = length(atac_peak_linkage.gr.ov.450k),
nb_paired_probes = length(unique(pairs.gr$Probe)),
nb_paired_probes_genes = length(pairs.gr))
})
probes.overlap.all.merged <- cbind("analysis"="ELMER",probes.overlap.all.merged)
probes.overlap.all.merged[,-c(2:3)]
peaks_overlap <- probes.overlap.all.merged %>%
group_by(analysis,add = TRUE) %>%
dplyr::summarise(
n = n(),
max = max(pairs_overlapping),
min = min(pairs_overlapping),
mean=mean(pairs_overlapping),
sd=sd(pairs_overlapping)
) %>%
mutate( se=sd/sqrt(n)) %>%
mutate( ic=se * qt((1-0.05)/2 + .5, n-1))
The following code compares 20 random evaluations to ATAC-seq linkages.
probes.overlap.all <- NULL
all.pairs <- NULL
files <- dir(path = "~/paper_elmer/atac-seq/BRCA-atac-seq/random",
pattern = "sesame_random_metdiff_015_fdr_n4_pairfdr_n9",
recursive = F,
full.names = T,
ignore.case = T)
probes.overlap.all <- adply(files,.margins = 1,.fun = function(f){
pairs <- get(load(f))
tryCatch({
pairs.gr <- makeGRangesFromDataFrame(pairs,
seqnames.field = "probe_seqnames",
start.field = "probe_start",
end.field = "probe_end",
strand.field = "probe_strand",
keep.extra.columns = T)
# Check overlap between inferred probes regions and atac-seq regions
probes_extended_500bp <- resize(pairs.gr,width = 501,fix = "center")
hits <- suppressWarnings(findOverlaps(atac_peak_linkage.gr.ov.450k,probes_extended_500bp))
peaks.hit <- unique(queryHits(hits))
idx <- which(atac_peak_linkage.gr.ov.450k[queryHits(hits)]$GeneID == pairs.gr[subjectHits(hits)]$GeneID)
matched.interaction <- unique(queryHits(hits)[idx])
idx <- which(atac_peak_linkage.gr.ov.450k[queryHits(hits)]$linkedGene == pairs.gr[subjectHits(hits)]$Symbol)
matched.interaction.symbol <- unique(queryHits(hits)[idx])
col <- paste0(gsub("\\.rda","",basename(f)))
data.frame(analysis = "ELMER BRCA merged",
row.names = col,
pairs_overlapping = length(matched.interaction),
pairs_overlapping.symbol = length(matched.interaction.symbol),
nb_paired_probes_genes = length(pairs.gr)
)
})
})
probes.overlap.all
library(dplyr)
library(ggthemes)
probes.overlap.all$analysis <- paste0(probes.overlap.all$analysis,"_random")
random_peaks_overlap <- probes.overlap.all %>%
group_by(analysis,add = TRUE) %>%
dplyr::summarise(
n = n(),
max = max(pairs_overlapping),
min = min(pairs_overlapping),
mean=mean(pairs_overlapping),
sd=sd(pairs_overlapping)
) %>%
mutate( se=sd/sqrt(n)) %>%
mutate( ic=se * qt((1-0.05)/2 + .5, n-1))
aux <- rbind(peaks_overlap,random_peaks_overlap)
aux$type <- "ELMER"
aux$type[grep("random",aux$analysis)] <- "Random"
aux$tumor <- "All BRCA merged"
aux
aux2 <- aux
aux2$mean <- aux2$mean/1328
aux2[1,which(is.na(aux2[1,]))] <- 0
aux2$tumor <- factor(aux2$tumor,levels = sort(unique(aux2$tumor),decreasing = TRUE))
aux2$type[1] <- "BRCA meth-genes links (ELMER)"
aux2$type[2] <- "Randomized ELMER links"
p <- plot_ly(data = aux2[which(aux2$analysis == 'ELMER'),], x = ~type, y = ~mean * 100, type = 'bar', name = 'BRCA meth-genes links (ELMER)') %>%
add_trace(data = aux2[which(aux2$analysis != 'ELMER'),], name = 'Randomized ELMER links', error_y = ~list(value = sd , color = '#000000')) %>%
layout(xaxis = list(
title = "",
showticklabels = FALSE),
legend = list(orientation = 'h'),
yaxis = list(
title = "Percent of Links Overlapping ATAC-seq Peak-to-Gene Links"))
p
The DNA methylation, RNA-seq are available as a MultiAsayExperiment while ATAC-seq as a SummarizedExperiment.
For each peak, we will take the average of the DNA methylation of overlapping probes and the linked gene expression (\(log_2(FPKM-UQ + 1)\)). If the gene is absent it will be set to NA. Also, since the ATAC-seq data has replicates, for each sample, the mean signal will be considered.
meth <- assay(getMet(mae))
registerDoParallel(4)
# DNA methylation
hits <- as.data.frame(findOverlaps(atac_peak_linkage.gr.ov.450k,distal_probes_extended_500bp))
avg.mean <- adply(unique(hits$queryHits), .margins = 1, .fun = function(x,df){
p <- names(distal_probes_extended_500bp)[df[df$queryHits == x,]$subjectHits]
ret <- colMeans(assay(getMet(mae))[p,,drop = FALSE])
ret
},.progress = "text",df = hits,.parallel = FALSE)
avg.mean$X1 <- NULL
rownames(avg.mean) <- paste0(atac_peak_linkage.gr.ov.450k$id_peak,"_",atac_peak_linkage.gr.ov.450k$linkedGene)
# Expression Matrix
expression <- getExp(mae)
expression.values <- adply(atac_peak_linkage.gr.ov.450k$GeneID,1, function(x){
if(!is.na(x) & x %in% rownames(expression)){
ret <- assay(expression)[x,]
} else {
ret <- rep(NA,ncol(expression))
}
ret
},.progress = "text")
expression.values$X1 <- NULL
# ATAC-seq data
atac.se <- read_rds("~/Downloads/TCGA_BRCA_ONLY_SE.rds")
atac.samples <- colData(atac.se)$submitter_id
metadata.atac.se <- as_tibble(rowRanges(atac.se))
metadata.atac.se$id_peak <- paste0("peak_id_",
as.character(metadata.atac.se$seqnames),
":",
metadata.atac.se$start-1,
"-",
metadata.atac.se$end)
atac_peak_linkage.gr.ov.450k$id_peak <- paste0("peak_id_",
as.character(seqnames(atac_peak_linkage.gr.ov.450k)),
":",
start(atac_peak_linkage.gr.ov.450k),
"-",
end(atac_peak_linkage.gr.ov.450k))
groupMeans <- function(mat, groups=NULL, na.rm = TRUE){
stopifnot(!is.null(groups))
gm <- lapply(unique(groups), function(x){
rowMeans(mat[,which(groups==x),drop=F], na.rm=na.rm)
}) %>% Reduce("cbind",.)
colnames(gm) <- unique(groups)
return(gm)
}
matMerged <- groupMeans(mat = assays(atac.se)$log2norm, groups=colData(atac.se)$submitter_id)
atac.values <- matMerged[match(as.character(atac_peak_linkage.gr.ov.450k$id_peak),as.character(metadata.atac.se$id_peak)),]
peak.score <- metadata.atac.se[match(as.character(atac_peak_linkage.gr.ov.450k$id_peak),as.character(metadata.atac.se$id_peak)),"score"]
save(avg.mean,
expression.values,
matMerged,
atac.values,
peak.score,
atac_peak_linkage.gr.ov.450k,
distal_probes_extended_500bp,
file = "~/paper_elmer/atac-seq/BRCA-atac-seq/heatmap_data.rda")
# 1) For RNA Heatmap
pal_rna <- colorRampPalette(c("#352A86","#343DAE","#0262E0","#1389D2",
"#2DB7A3","#A5BE6A","#F8BA43","#F6DA23","#F8FA0D"))(100)
# 2) For Methylation Heatmap
pal_methylation <- colorRampPalette(c("#000436","#021EA9","#1632FB",
"#6E34FC","#C732D5","#FD619D",
"#FF9965","#FFD32B","#FFFC5A"))(100)
# 3) For ATAC Heatmap
pal_atac <- colorRampPalette(c('#3361A5', '#248AF3', '#14B3FF',
'#88CEEF', '#C1D5DC', '#EAD397',
'#FDB31A','#E42A2A', '#A31D1D'))(100)
# 4) For ATAC Bigwig Heatmap style
pal_atac_bw_heatmap <- colorRampPalette(c("white","#2488F0","#7F3F98",
"#E22929","#FCB31A"))(100)
The rows will be ordered first with the ELMER positive results (those also found in ATAC-seq analysis), which will be clustered by jaccard distance, followed by all ELMER negative results, which will be clustered based on the DNA methylation euclidean distance.
# ELMER positive rows
idx.ov <- which(rowSums(matrix) > 0)
order.rows.ov <- vegan::vegdist(matrix[idx.ov,],method="jaccard") %>% hclust(method="ward") # cluster rows based on overlap matrix
# ELMER negative rows
idx.non.ov <- which(rowSums(matrix) == 0)
order.rows.non.ov <- dist(avg.mean[idx.non.ov,], method = "euclidean") %>% hclust(method="ward") # cluster rows based on atac-seq
# Final ordering
order.heatmap <- c(idx.ov[order.rows.ov$order],idx.non.ov[order.rows.non.ov$order])
All heatmaps will have the samples sorted based on the PAM 50 classification in the following order: LumA, LumB, Basal, her2, Normal, Unknown.
library(ComplexHeatmap)
library(circlize)
# ATAC-seq data samples order
colData(atac.se)$BRCA_pam50[colData(atac.se)$Group == "8762FC8D-91FC-42B8-B74D-141298970EFC"] <- "LumB"
colData(atac.se)$BRCA_pam50[colData(atac.se)$Group == "A6462625-3970-44A4-B4CB-7B4AE86648CD"] <- "Normal"
colData(atac.se)$BRCA_pam50[colData(atac.se)$Group == "BE061C50-495F-46AD-BF95-09C8AD8B5365"] <- "LumB"
atac.samples.groups <- colData(atac.se)$BRCA_pam50[match(colnames(atac.values),colData(atac.se)$submitter_id)]
col.order.atac.val <- c( which(atac.samples.groups == "LumA"),
which(atac.samples.groups == "LumB"),
which(atac.samples.groups == "Basal"),
which(atac.samples.groups == "Her2"),
which(atac.samples.groups == "Normal"),
which(atac.samples.groups == "Unknown")
)
# Overlapping matrix samples order
df.annotation <- data.frame("Analysis" = stringr::str_trim(as.character(
sapply(colnames(matrix), FUN = function(x) {
unlist(strsplit(x,"\\("))[[1]]}
))),
"Comparison" = stringr::str_trim(as.character(
gsub("Normal","Normal-like",gsub(")","",sapply(colnames(matrix), FUN = function(x) {
unlist(strsplit(x,"\\("))[[2]]}
)))))
)
col.order.atac <- c(which(df.annotation$Analysis == "Luminal"),
which(df.annotation$Analysis == "Basal"),
which(df.annotation$Analysis == "Her2"),
which(df.annotation$Analysis == "Normal"),
which(is.na(df.annotation$Analysis))
)
# DNA methylation and gene expression matrix samples order
col.order.met <- c( which(mae$PAM50 == "LumA"),
which(mae$PAM50 == "LumB"),
which(mae$PAM50 == "Basal"),
which(mae$PAM50 == "Her2"),
which(mae$PAM50 == "Normal"),
which(is.na(mae$PAM50))
)
ELMER was able to identify regions hypomethylated in a particular molecular subytpes and its linkage gene is higher expressed if compared to the other molecular subtypes. Also, the ATAC-seq signal for those hypomethylated regions are expected to have a higher signal.
The first heatmap summarizes which of the ATAC-seq linkages overlapping distal probes (n = 1328) was identified by an ELMER analysis. Each column represents one analysis which are grouped by colors. For example, the green group are all active linkages found for Luminal samples in the following analysis: Luminal vs Basal, Luminal vs Her2, Luminal vs Normal-like.
The second heatmap shows the ATAC-seq Z-score of the mean log2 (normalized signal), in which redder the color is, higher is the signal (more accessible region).
The third heatmap shows the DNA methylation beta values, in which the bluer is the signal, lower is the methylation level (more accessible region).
The fourth heatmap shows the RNA-seq Z-scores, in which the yellower is the signal, higher is the expression.
# Overlapping matrix annotation
hb = HeatmapAnnotation(df = data.frame("Analysis"=df.annotation$Analysis),
col = list("Analysis" = c("Luminal" = "#208A42",
"Basal"="#272E6A",
"Her2"="#89288F",
"Normal"="#F47D2B",
"NA"="#AAAAAA")),
show_annotation_name = FALSE,
show_legend = TRUE,
annotation_name_side = "left",
annotation_legend_param = list(name = "Hypomethylated in:",
title = "Hypomethylated in:",
labels = c("Luminal","Basal","Her2","Normal-like","Unknown"),
at = c("Luminal","Basal","Her2","Normal","NA"),
labels_gp = gpar(fontsize = 12), title_gp = gpar(fontsize = 12)),
annotation_name_gp = gpar(fontsize = 12))
x <- data.frame("links"= rep(0,1328))
x[grep("peak_id_chr14:37588499-37589000_FOXA1",rownames(avg.mean)),"links"] <- 1
x[grep("peak_id_chr14:37591539-37592040_FOXA1",rownames(avg.mean)),"links"] <- 2
x[grep("peak_id_chr6:1575027-1575528_FOXC1",rownames(avg.mean)),"links"] <- 3
x[grep("peak_id_chr6:1614641-1615142_FOXC1",rownames(avg.mean)),"links"] <- 4
rownames(x)[grep("peak_id_chr14:37588499-37589000_FOXA1",rownames(avg.mean))] <- "cg27143688-FOXA1"
rownames(x)[grep("peak_id_chr6:1575027-1575528_FOXC1",rownames(avg.mean))] <- "cg05392244-FOXC1"
rownames(x)[grep("peak_id_chr6:1614641-1615142_FOXC1",rownames(avg.mean))] <- "cg25036779-FOXC1"
rownames(x)[grep("peak_id_chr14:37591539-37592040_FOXA1",rownames(avg.mean))] <- "cg04932551-FOXA1"
ht_list <-
Heatmap(x,
name = "Overlapping regions",
heatmap_legend_param = list(legend_direction = "horizontal",labels_gp = gpar(fontsize = 12), title_gp = gpar(fontsize = 12)),
width = unit(0.5, "cm"),row_names_side = "left",
row_order = order.heatmap,
column_title = "", cluster_rows = F,
show_column_names = F
)
# Overlapping matrix
ht_list <-
Heatmap(matrix,
name = "Overlap",
col = colorRamp2(c(0, 1), c("#ffffff", "#49006a")),
column_names_gp = gpar(fontsize = 12),
show_column_names = FALSE,
heatmap_legend_param = list(color_bar = "discrete",labels = c("Yes","No"),at = c(1, 0),
labels_gp = gpar(fontsize = 12), title_gp = gpar(fontsize = 12)),
use_raster = TRUE,
raster_device = c("png"),
raster_quality = 2,
show_row_names = FALSE,
show_heatmap_legend = FALSE,
cluster_columns = FALSE,
cluster_rows = F,
top_annotation = hb,
# bottom_annotation = hd,
column_order = col.order.atac,
row_order = order.heatmap,
#top_annotation_height = unit(8, "mm"),
width = unit(3, "cm"),
row_names_gp = gpar(fontsize = 4),
column_title = paste0("ELMER and \nATAC-seq links \n (# overlaps: ", sum(rowSums(matrix) > 0),")"),
row_title = "ATAC-seq pairs overlapping \n distal probes (n = 1328)",
column_title_gp = gpar(fontsize = 12),
row_title_gp = gpar(fontsize = 16))
ht_list <- ht_list +
Heatmap(-log10(atac_peak_linkage.gr.ov.450k$padj),
name = "ATAC-seq padj (-log10)",
heatmap_legend_param = list(legend_direction = "horizontal",labels_gp = gpar(fontsize = 12), title_gp = gpar(fontsize = 12)),
width = unit(0.5, "cm"),
row_order = order.heatmap,
column_title = "",
show_column_names = F
)
# ATAC-seq annotation
hc = HeatmapAnnotation(df = data.frame("pam50" = atac.samples.groups),
col = list("pam50" = c("LumA" = "#208A42",
"LumB"="#D51F26",
"Basal"="#272E6A",
"Her2"="#89288F",
"Normal"="#F47D2B",
"Unknown"="#AAAAAA")),
show_annotation_name = FALSE,
annotation_name_side = "left",
annotation_legend_param = list(labels = c("LumA","LumB","Basal","Her2","Normal-like","Unknown"),
at = c("LumA","LumB","Basal","Her2","Normal","Unknown"),
labels_gp = gpar(fontsize = 12), title_gp = gpar(fontsize = 12)),
annotation_name_gp = gpar(fontsize = 6))
# ATAC-seq matrix
ht_list <- ht_list +
Heatmap(t(scale (t(atac.values))),
name = "ATAC-seq (z-score)",
col =colorRamp2(seq(-2, 2, by=4/99), pal_atac),
heatmap_legend_param = list(legend_direction = "horizontal",
labels_gp = gpar(fontsize = 12),
title_gp = gpar(fontsize = 12)),
column_names_gp = gpar(fontsize = 8),
show_column_names = F,
show_row_names = F,
cluster_columns = FALSE,
use_raster = TRUE,
raster_device = c("png"),
raster_quality = 2,
row_order = order.heatmap,
column_order = col.order.atac.val,
cluster_rows = F,
row_names_gp = gpar(fontsize = 12),
top_annotation = hc,
width = unit(10, "cm"),
column_title = "ATAC-seq z-score (n = 74)",
column_title_gp = gpar(fontsize = 12),
row_title_gp = gpar(fontsize = 16))
# DNA methylation and gene expression annotation
ha = HeatmapAnnotation(df = colData(mae)[,c("PAM50"),drop = F],
col = list("PAM50" = c("LumA" = "#208A42",
"LumB"="#D51F26",
"Basal"="#272E6A",
"Her2"="#89288F",
"Normal"="#F47D2B",
"NA"="#AAAAAA")),
show_annotation_name = FALSE,
show_legend = FALSE,
annotation_legend_param = list(labels = c("LumA","LumB","Basal","Her2","Normal-like","Unknown"),
at = c("LumA","LumB","Basal","Her2","Normal","NA"),
labels_gp = gpar(fontsize = 12), title_gp = gpar(fontsize = 12)),
annotation_name_side = "left",
annotation_name_gp = gpar(fontsize = 6))
# DNA methylation matrix
ht_list <- ht_list +
Heatmap(avg.mean,
name = "Methylation Beta-value",
col = colorRamp2(seq(0, 1, by=1/99), pal_methylation),
column_names_gp = gpar(fontsize = 8),
heatmap_legend_param = list(legend_direction = "horizontal",labels_gp = gpar(fontsize = 12), title_gp = gpar(fontsize = 12)),
show_column_names = F,
show_row_names = F,
use_raster = TRUE,
raster_device = c("png"),
raster_quality = 2,
cluster_columns = FALSE,
row_order = order.heatmap,
cluster_rows = F,
column_order = col.order.met,
row_names_gp = gpar(fontsize = 12),
top_annotation = ha,
width = unit(10, "cm"),
column_title = "DNA methylation (n = 858)",
column_title_gp = gpar(fontsize = 12),
row_title_gp = gpar(fontsize = 16))
# RNA-seq matrix
ht_list <- ht_list +
Heatmap(t(scale (t(expression.values))),
name = "RNA-seq (z-score)",
col = colorRamp2(seq(-2, 2, by=4/99), pal_rna),
column_names_gp = gpar(fontsize = 8),
show_column_names = F,
heatmap_legend_param = list(legend_direction = "horizontal",
labels_gp = gpar(fontsize = 12),
title_gp = gpar(fontsize = 12)),
show_row_names = F,
cluster_columns = FALSE,
row_order = order.heatmap,
use_raster = TRUE,
raster_device = c("png"),
raster_quality = 2,
cluster_rows = F,
column_order = col.order.met,
row_names_gp = gpar(fontsize = 12),
top_annotation = ha,
width = unit(10, "cm"),
column_title = "RNA-seq z-score (n = 858)",
column_title_gp = gpar(fontsize = 12),
row_title_gp = gpar(fontsize = 16))
#pdf("Heamtap_A_small.pdf", width = 20, height = 8)
draw(ht_list,newpage = TRUE,
column_title = "ATAC-seq pairs overlapping BRCA ELMER anti-correlated paired gene-probes",
column_title_gp = gpar(fontsize = 24, fontface = "bold"),
heatmap_legend_side = "bottom",
annotation_legend_side = "right")
# Get TPM values
load(file = "~/paper_elmer/atac-seq/BRCA-atac-seq/fox.exp.brca.tpm.rda")
simpleCap <- function(x) {
if(is.na(x)) return("NA")
s <- x
paste(toupper(substring(s, first = 1, last = 1)), tolower(substring(s, 2)),
sep = "", collapse = " ")
}
probe <- "cg27143688"
symbol <- "FOXA1"
category = "PAM50"
legend.title <- simpleCap(category)
samples <- sampleMap(mae)[sampleMap(mae)$assay == "DNA methylation","primary"]
category <- colData(mae)[samples,category]
category[grep("Normal",category)] <- "Normal-like"
category[is.na(category)] <- "Unknown"
rownames(fox.exp.brca) <- fox.exp.brca$X1
p.a <- scatter(meth = assay(getMet(mae)[probe,]),
exp = t(log2(fox.exp.brca[,2] + 1)),
category = category,
ylim = c(0,10),
legend.title = legend.title,
correlation = TRUE,
xlab = expression(paste("DNA methylation ",beta," value")),
ylab = expression("RNA-seq - log"[2]*"(TPM + 1)"),
title = paste0("Basal-specific Silencing\n",
symbol," - ",probe,"\n",
paste0("chr",paste(probes.metadata[probes.metadata$Probe == probe,c("seqnames","start")],collapse = ":"))
),
lm_line=TRUE) +
scale_color_manual(breaks = c("Basal", "Her2", "LumA","LumB","Normal-like","Unknown"),
values=c("#272E6A", "#89288F", "#208A42","#D51F26","#F47D2B","#AAAAAA")) +
labs(colour = "PAM50") +
theme(
title = element_text(size = 10),
panel.background = element_blank(),
panel.grid.major = element_blank(),
axis.text = element_text(colour="black"),
panel.grid.minor = element_blank(),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(angle = 0,hjust = 1,size = 10)
)
probe <- "cg04932551"
symbol <- "FOXA1"
category = "PAM50"
legend.title <-simpleCap(category)
samples <- sampleMap(mae)[sampleMap(mae)$assay == "DNA methylation","primary"]
category <- colData(mae)[samples,category]
category[grep("Normal",category)] <- "Normal-like"
category[is.na(category)] <- "Unknown"
rownames(fox.exp.brca) <- fox.exp.brca$X1
p.b <- scatter(meth = assay(getMet(mae)[probe,]),
exp = t(log2(fox.exp.brca[,2] + 1)),
category = category,
ylim = c(0,10),
legend.title = legend.title,
correlation = TRUE,
xlab = expression(paste("DNA methylation ",beta," value")),
ylab = "",
title = paste0("Basal-specific Silencing\n",
symbol," - ",probe,"\n",
paste0("chr",paste(probes.metadata[probes.metadata$Probe == probe,c("seqnames","start")],collapse = ":"))
),
lm_line=TRUE) +
scale_color_manual(breaks = c("Basal", "Her2", "Luma","Lumb","Normal-like","Unknown"),
values = c("#272E6A", "#89288F", "#208A42","#D51F26","#F47D2B","#AAAAAA")) +
theme(
title = element_text(size = 10),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(colour="black"),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(angle = 0,hjust = 1,size = 10)
)
probe <- "cg05392244"
symbol <- "FOXC1"
category = "PAM50"
legend.title <-simpleCap(category)
samples <- sampleMap(mae)[sampleMap(mae)$assay == "DNA methylation","primary"]
category <- colData(mae)[samples,category]
rownames(fox.exp.brca) <- fox.exp.brca$X1
category[grep("Normal",category)] <- "Normal-like"
category[is.na(category)] <- "Unknown"
p.c <- scatter(meth = assay(getMet(mae)[probe,]),
exp = t(log2(fox.exp.brca[,3] + 1)),
category = category,
ylim = c(0,10),
legend.title = legend.title,
correlation = TRUE,
xlab = expression(paste("DNA methylation ",beta," value")),
ylab = expression("RNA-seq - log"[2]*"(TPM + 1)"),
title = paste0("Basal-specific Activation\n",
symbol," - ",probe,"\n",
paste0("chr",paste(probes.metadata[probes.metadata$Probe == probe,c("seqnames","start")],collapse = ":"))
),
lm_line=TRUE) +
scale_color_manual(breaks = c("Basal", "Her2", "Luma","Lumb","Normal-like","Unknown"),
values=c("#272E6A", "#89288F", "#208A42","#D51F26","#F47D2B","#AAAAAA")) +
theme(
title = element_text(size = 10),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(colour="black"),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(angle = 0,hjust = 1,size = 10)
)
probe <- "cg25036779"
symbol <- "FOXC1"
category = "PAM50"
legend.title <-simpleCap(category)
samples <- sampleMap(mae)[sampleMap(mae)$assay == "DNA methylation","primary"]
category <- colData(mae)[samples,category]
category[grep("Normal",category)] <- "Normal-like"
category[is.na(category)] <- "Unknown"
rownames(fox.exp.brca) <- fox.exp.brca$X1
p.d <- scatter(meth = assay(getMet(mae)[probe,]),
exp = t(log2(fox.exp.brca[,3] + 1)),
category = category,
ylim = c(0,10),
legend.title = legend.title,
correlation = TRUE,
xlab = expression(paste("DNA methylation ",beta," value")),
ylab = "",
title = paste0("Basal-specific Activation\n",
symbol," - ",probe,"\n",
paste0("chr",paste(probes.metadata[probes.metadata$Probe == probe,c("seqnames","start")],collapse = ":"))
),
lm_line=TRUE) +
scale_color_manual(breaks = c("Basal", "Her2", "Luma","Lumb","Normal-like","Unknown"),
values=c("#272E6A", "#89288F", "#208A42","#D51F26","#F47D2B","#AAAAAA")) +
theme(
title = element_text(size = 10),
panel.background = element_blank(),
panel.grid.major = element_blank(),
axis.text = element_text(colour="black"),
panel.grid.minor = element_blank(),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(angle = 0,hjust = 1,size = 10)
)
library(ggpubr)
ggarrange(p.a,p.b,p.c,p.d,
#egend = "none",
common.legend = TRUE,
labels = c("B", "", "C",""),
ncol = 2, nrow = 2)
The R/Bioconductor package ELMER (Enhancer Linking by Methylation/Expression Relationships) provides a systematic approach that reconstructs altered gene regulatory networks (GRNs) by combining enhancer methylation and gene expression data derived from the same sample set (Chedraoui Silva et al. 2018).
HM450 TCGA methylation data was processed with SeSAMe (https://doi.org/10.1093/nar/gky691), and matched to RNA-seq (FPKM-UQ) for 858 Breast Cancer samples. ELMER (Chedraoui Silva et al. 2018) supervised analysis was used to perform pairwise analysis of each PAM50 (Berger et al. 2018) subtype against all other subtypes, using only the default “distal” probe filter, which uses only \(160,944\) probes that are \(>2kb\) from a GENCODE 28 transcription start site. We used the following cutoffs for ELMER analysis: \(get.diff.meth(sig.diff) = 0.15\), \(get.diff.meth(pvalue) = 10^{-4}\), \(get.pair(Pe) = 10^{-9}\), \(get.pair(raw.pvalue = 10^{-9})\). ELMER version 2.5.4 (Chedraoui Silva et al. 2018) was used for all analyses.
The tables below show the analysis performed and the number of samples in each group. It is important to note that for a Basal vs Luminal analysis, we search for active regulatory networks in each subtype compared to the other.
The main ELMER arguments used in the analysis are shown below.
library(stringr)
library(TCGAbiolinks)
library(dplyr)
library(ELMER)
library(MultiAssayExperiment)
library(parallel)
library(readr)
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=--=-=-=-=-=-=-=-
# 1) Download TCGA data (DNA methylation, gene expression and clinical information)
# into a MultiAssayExperiment object
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=--=-=-=-=-=-=-=-
mae.file <- c("~/paper_elmer/atac-seq/BRCA-atac-seq/mae_BRCA_hg38_450K_no_ffpe_sesame_fixed_annotation_no_filter.rda")
if(file.exists(mae.file)) {
mae <- get(load(mae.file))
} else {
getTCGA(disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
basedir = "DATA", # Where data will be downloaded
genome = "hg38", # Genome of refenrece "hg38" or "hg19"
Meth = FALSE)
distal.probes <- get.feature.probe(feature = NULL,
genome = "hg38",
met.platform = "450K")
# The DNA methylation array (450K) IDATs were processed with SESAME (https://doi.org/10.1093/nar/gky691)
mae <- createMAE(exp = "~/paper_elmer/Data/BRCA/BRCA_RNA_hg38.rda", # FPKM-UQ
met = "/hdd/DNA_met_sesame/BRCA.rda",
met.platform = "450K",
genome = "hg38",
linearize.exp = TRUE, # log2(FPKM-UQ + 1)
filter.probes = distal.probes,
met.na.cut = 1, # We will keep all probes, ELMER can deal with it
save = FALSE,
TCGA = TRUE)
# Remove FFPE samples from the analysis
mae <- mae[,!mae$is_ffpe]
# Add most updated TCGA-BRCA molecular subtype
# The classification is found at https://doi.org/10.1016/j.ccell.2018.03.014 table mmc4.xlsx
subtypes <- readr::read_tsv("~/paper_elmer/BRCA.1218_pam50scores.FINAL.txt")[,c(1,2,9)]
subtypes$sample <- substr(subtypes$Barcode, 1, 16)
meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T)
meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
meta.data <- S4Vectors::DataFrame(meta.data)
rownames(meta.data) <- meta.data$sample
stopifnot(all(meta.data$patient == colData(mae)$patient))
colData(mae) <- meta.data
# In our analysis LuminalB and Luminal A will be merged in one single Luminal group
mae$PAM50_merged <- gsub("LumB|LumA","Luminal",mae$Call)
save(mae, file = mae.file)
}
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=--=-=-=-=-=-=-=-=-=-=-=-
# 2) ELMER analysis
# Main arguments:
# - mode: supervised
# - Mean diff. meth.: 0.1
# - FDR diff. meth.: 0.05
# - FDR gene-expression - meth levels.: 0.005
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=--=-=-=-=-=-=-=-=-=-=-=-
cores <- 4
direction <- c( "hypo","hyper")
genome <- "hg38"
group.col <- "PAM50_merged"
groups <- t(combn(na.omit(unique(colData(mae)[,group.col])),2))
for(g in 1:nrow(groups)) {
group1 <- groups[g,1]
group2 <- groups[g,2]
for (j in direction){
tryCatch({
message("Analysing probes ",j, "methylated in ", group1, " vs ", group2)
dir.out <- paste0("BRCA_supervised_",genome,"_sesame_metdiff_015_fdr_n4_pairfdr_n9/",
group1,"_",group2,"/",j)
if(file.exists(paste0(dir.out,"/getTF.",j,".TFs.with.motif.pvalue.rda"))) next
dir.create(dir.out, recursive = TRUE)
#--------------------------------------
# STEP 3: Analysis |
#--------------------------------------
# Step 3.1: Get diff methylated probes |
#--------------------------------------
Sig.probes <- get.diff.meth(data = mae,
group.col = group.col,
group1 = group1,
group2 = group2,
sig.dif = 0.15,
mode = "supervised",
cores = cores,
dir.out = dir.out,
diff.dir = j,
pvalue = 10^-4)
if(nrow(Sig.probes) == 0) next
#-------------------------------------------------------------
# Step 3.2: Identify significant probe-gene pairs |
#-------------------------------------------------------------
# Collect nearby 20 genes for Sig.probes
nearGenes <- GetNearGenes(data = mae,
probe = Sig.probes$probe,
cores = cores)
pair <- get.pair(data = mae,
nearGenes = nearGenes,
group.col = group.col,
group1 = group1,
group2 = group2,
permu.dir = paste0(dir.out,"/permu"),
dir.out = dir.out,
mode = "supervised",
diff.dir = j,
filter.probes = FALSE,
cores = cores,
label = j,
Pe = 10^-9, # FDR correction cut-off
raw.pvalue = 10^-9)
})
}
}
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/atlas-base/atlas/libblas.so.3.0
## LAPACK: /usr/lib/atlas-base/atlas/liblapack.so.3.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] ggpubr_0.1.7 magrittr_1.5
## [3] circlize_0.4.4 ComplexHeatmap_1.17.1
## [5] ggthemes_4.0.0 bindrcpp_0.2.2
## [7] iheatmapr_0.4.4 knitr_1.20
## [9] MultiAssayExperiment_1.4.9 SummarizedExperiment_1.8.1
## [11] DelayedArray_0.4.1 matrixStats_0.54.0
## [13] Biobase_2.38.0 dplyr_0.7.6
## [15] plotly_4.8.0 ggplot2_3.0.0
## [17] ChIPseeker_1.14.2 GenomicRanges_1.30.3
## [19] GenomeInfoDb_1.14.0 IRanges_2.12.0
## [21] S4Vectors_0.16.0 BiocGenerics_0.24.0
## [23] plyr_1.8.4 ELMER_2.5.4
## [25] ELMER.data_2.5.4 readr_1.1.1
##
## loaded via a namespace (and not attached):
## [1] rtracklayer_1.38.3
## [2] R.methodsS3_1.7.1
## [3] tidyr_0.8.1
## [4] acepack_1.4.1
## [5] bit64_0.9-7
## [6] aroma.light_3.8.0
## [7] R.utils_2.6.0
## [8] data.table_1.11.4
## [9] rpart_4.1-13
## [10] hwriter_1.3.2
## [11] RCurl_1.95-4.11
## [12] AnnotationFilter_1.2.0
## [13] doParallel_1.0.11
## [14] GenomicFeatures_1.30.3
## [15] cowplot_0.9.3
## [16] RSQLite_2.1.1
## [17] commonmark_1.5
## [18] bit_1.1-14
## [19] xml2_1.2.0
## [20] httpuv_1.4.5
## [21] assertthat_0.2.0
## [22] hms_0.4.2
## [23] evaluate_0.11
## [24] promises_1.0.1
## [25] BiocInstaller_1.28.0
## [26] fansi_0.2.3
## [27] progress_1.2.0
## [28] caTools_1.17.1.1
## [29] km.ci_0.5-2
## [30] igraph_1.2.1
## [31] DBI_1.0.0
## [32] geneplotter_1.56.0
## [33] htmlwidgets_1.2
## [34] reshape_0.8.7
## [35] EDASeq_2.12.0
## [36] matlab_1.0.2
## [37] purrr_0.2.5
## [38] crosstalk_1.0.0
## [39] selectr_0.4-1
## [40] backports_1.1.2
## [41] permute_0.9-4
## [42] annotate_1.56.2
## [43] gridBase_0.4-7
## [44] biomaRt_2.34.2
## [45] ensembldb_2.2.2
## [46] withr_2.1.2
## [47] Gviz_1.22.3
## [48] BSgenome_1.46.0
## [49] checkmate_1.8.5
## [50] vegan_2.5-2
## [51] GenomicAlignments_1.14.2
## [52] prettyunits_1.0.2
## [53] cluster_2.0.7-1
## [54] DOSE_3.4.0
## [55] lazyeval_0.2.1
## [56] crayon_1.3.4
## [57] genefilter_1.60.0
## [58] labeling_0.3
## [59] edgeR_3.20.9
## [60] pkgconfig_2.0.1
## [61] nlme_3.1-137
## [62] ProtGenerics_1.10.0
## [63] nnet_7.3-12
## [64] devtools_1.13.6
## [65] bindr_0.1.1
## [66] rlang_0.2.1
## [67] downloader_0.4
## [68] AnnotationHub_2.10.1
## [69] dichromat_2.0-0
## [70] rprojroot_1.3-2
## [71] Matrix_1.2-14
## [72] KMsurv_0.1-5
## [73] boot_1.3-20
## [74] zoo_1.8-3
## [75] base64enc_0.1-3
## [76] GlobalOptions_0.1.0
## [77] png_0.1-7
## [78] viridisLite_0.3.0
## [79] rjson_0.2.20
## [80] bitops_1.0-6
## [81] shinydashboard_0.7.0
## [82] R.oo_1.22.0
## [83] ConsensusClusterPlus_1.42.0
## [84] KernSmooth_2.23-15
## [85] Biostrings_2.46.0
## [86] blob_1.1.1
## [87] TCGAbiolinks_2.9.2
## [88] shape_1.4.4
## [89] stringr_1.3.1
## [90] qvalue_2.10.0
## [91] ShortRead_1.36.1
## [92] scales_0.5.0
## [93] memoise_1.1.0
## [94] gplots_3.0.1
## [95] gdata_2.18.0
## [96] zlibbioc_1.24.0
## [97] compiler_3.4.4
## [98] RColorBrewer_1.1-2
## [99] plotrix_3.7-2
## [100] Rsamtools_1.30.0
## [101] cli_1.0.0
## [102] XVector_0.18.0
## [103] htmlTable_1.12
## [104] Formula_1.2-3
## [105] MASS_7.3-50
## [106] mgcv_1.8-24
## [107] tidyselect_0.2.4
## [108] stringi_1.2.4
## [109] yaml_2.2.0
## [110] GOSemSim_2.4.1
## [111] locfit_1.5-9.1
## [112] latticeExtra_0.6-28
## [113] ggrepel_0.8.0
## [114] survMisc_0.5.5
## [115] VariantAnnotation_1.24.5
## [116] fastmatch_1.1-0
## [117] tools_3.4.4
## [118] rstudioapi_0.7
## [119] foreach_1.4.4
## [120] foreign_0.8-71
## [121] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [122] gridExtra_2.3
## [123] digest_0.6.15
## [124] rvcheck_0.1.0
## [125] shiny_1.1.0
## [126] cmprsk_2.2-7
## [127] Rcpp_0.12.18
## [128] broom_0.5.0
## [129] later_0.7.3
## [130] httr_1.3.1
## [131] survminer_0.4.2
## [132] ggdendro_0.1-20
## [133] AnnotationDbi_1.40.0
## [134] biovizBase_1.26.0
## [135] colorspace_1.3-2
## [136] rvest_0.3.2
## [137] XML_3.98-1.12
## [138] splines_3.4.4
## [139] xtable_1.8-2
## [140] jsonlite_1.5
## [141] UpSetR_1.3.3
## [142] testthat_2.0.0
## [143] R6_2.2.2
## [144] Hmisc_4.1-1
## [145] pillar_1.3.0
## [146] htmltools_0.3.6
## [147] mime_0.5
## [148] DT_0.4
## [149] glue_1.3.0
## [150] BiocParallel_1.12.0
## [151] RMySQL_0.10.15
## [152] DESeq_1.30.0
## [153] interactiveDisplayBase_1.16.0
## [154] codetools_0.2-15
## [155] fgsea_1.4.1
## [156] utf8_1.1.4
## [157] lattice_0.20-35
## [158] tibble_1.4.2
## [159] sva_3.26.0
## [160] curl_3.2
## [161] gtools_3.8.1
## [162] GO.db_3.5.0
## [163] survival_2.42-6
## [164] limma_3.34.9
## [165] roxygen2_6.0.1
## [166] rmarkdown_1.10
## [167] munsell_0.5.0
## [168] DO.db_2.9
## [169] GetoptLong_0.1.7
## [170] fastcluster_1.1.25
## [171] GenomeInfoDbData_1.0.0
## [172] iterators_1.0.10
## [173] reshape2_1.4.3
## [174] gtable_0.2.0
Berger, Ashton C, Anil Korkut, Rupa S Kanchi, Apurva M Hegde, Walter Lenoir, Wenbin Liu, Yuexin Liu, et al. 2018. “A Comprehensive Pan-Cancer Molecular Study of Gynecologic and Breast Cancers.” Cancer Cell 33 (4). Elsevier:690–705.
Chedraoui Silva, Tiago, Simon G. Coetzee, Lijing Yao, Nicole Gull, Dennis J. Hazelett, Houtan Noushmehr, De-Chen Lin, and Benjamin P. Berman. 2018. “ELMER V.2: An R/Bioconductor Package to Reconstruct Gene Regulatory Networks from Dna Methylation and Transcriptome Profiles.” bioRxiv. Cold Spring Harbor Laboratory. https://doi.org/10.1101/148726.