library(readr) library(dplyr) library(ggplot2) library(tidyverse) library(DESeq2) mycounts <- read.csv("C:/Users/~/Desktop/datacounts.csv") metadata <- read.csv("C:/Users/~/Desktop/datametadata.csv") countData <- mycounts[!(duplicated(mycounts[[1]]) | duplicated(mycounts[[1]], fromLast=TRUE)), ] rownames(mycounts) = make.names(mycounts$gene, unique=TRUE) mycounts <- as.data.frame(mycounts) metadata <- as.data.frame(metadata) head(mycounts) head(metadata) class(mycounts) class(metadata) dds <- DESeqDataSetFromMatrix(countData=mycounts, colData=metadata, design=~1, tidy=TRUE) dds sizeFactors(dds) dispersions(dds) dds <- DESeq(dds) res = results(dds, contrast=c("Group", "CNL", "Target"), tidy=TRUE) res <- as_tibble(res) res res %>% filter(padj<0.05) %>% write_csv("sigresults.csv") #smallestGroupSize <- 3 #keep <- rowSums(counts(dds) >= 10) >= smallestGroupSize #dds <- dds[keep,] vsdata <- vst(dds, blind=FALSE) plotPCA(vsdata, intgroup="Group") + geom_label(aes(label = colnames(vsdata))) + labs(title=" ") df <- as.data.frame(colData(vsdata)[,c("Group","R","NR")]) H <- c("A","B","C")]) mat <- assay(vsdata)[ H, ] mat <- as.data.frame(mat) mat1 <- mat %>% arrange(desc(TD)) pheatmap(mat1, annotation_col=df, cluster_rows=F, cluster_cols=F,main = "") plotCounts(dds, gene="MYC", intgroup="Group", returnData=TRUE) %>% ggplot(aes(Group, count)) + geom_boxplot(aes(fill=Group)) + scale_y_log10() + ggtitle("MYC") vsdata <- varianceStabilizingTransformation(dds) (less row) Volcanoplot p <- ggplot(data=res, aes(x=log2FoldChange, y=-log10(pvalue))) + geom_point() # Add more simple "theme" p <- ggplot(data=res, aes(x=log2FoldChange, y=-log10(pvalue))) + geom_point() + theme_minimal() # Add vertical lines for log2FoldChange thresholds, and one horizontal line for the p-value threshold p2 <- p + geom_vline(xintercept=c(-0.6, 0.6), col="red") + geom_hline(yintercept=-log10(0.05), col="red") # The significantly differentially expressed genes are the ones found in the upper-left and upper-right corners. # Add a column to the data frame to specify if they are UP- or DOWN- regulated (log2FoldChange respectively positive or negative) # add a column of NAs res$diffexpressed <- "NO" # if log2Foldchange > 0.6 and pvalue < 0.05, set as "UP" res$diffexpressed[res$log2FoldChange > 0.6 & res$pvalue < 0.05] <- "UP" # if log2Foldchange < -0.6 and pvalue < 0.05, set as "DOWN" res$diffexpressed[res$log2FoldChange < -0.6 & res$pvalue < 0.05] <- "DOWN" # Re-plot but this time color the points with "diffexpressed" p <- ggplot(data=res, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed)) + geom_point() + theme_minimal() # Add lines as before... p2 <- p + geom_vline(xintercept=c(-0.6, 0.6), col="red") + geom_hline(yintercept=-log10(0.05), col="red") ## Change point color # 1. by resfault, it is assigned to the categories in an alphabetical orresr): p3 <- p2 + scale_color_manual(values=c("blue", "black", "red")) # 2. to automate a bit: ceate a named vector: the values are the colors to be used, the names are the categories they will be assigned to: mycolors <- c("blue", "red", "black") names(mycolors) <- c("DOWN", "UP", "NO") p3 <- p2 + scale_colour_manual(values = mycolors) # Now write down the name of genes besires the points... # Create a new column "reslabel" to res, that will contain the name of genes differentially expressed (NA in case they are not) res$delabel <- NA res$delabel[res$diffexpressed != "NO"] <- res$row[res$diffexpressed != "NO"] ggplot(data=res, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) + geom_point() + theme_minimal() + geom_text() library(ggrepel) # plot adding up all layers we have seen so far ggplot(data=res, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) + geom_point() + theme_minimal() + geom_text_repel() + scale_color_manual(values=c("blue", "black", "red")) + geom_vline(xintercept=c(-0.6, 0.6), col="red") + geom_hline(yintercept=-log10(0.05), col="red") plotCounts(dds, gene="FSTL5", intgroup="Treatment", returnData=TRUE) %>% ggplot(aes(Treatment, count)) + geom_boxplot(aes(fill=Treatment)) + scale_y_log10() + ggtitle("FSTL5") P + scale_x_discrete(name ="Group", limits=c("CNL","Target")) library("genefilter") topVarGenes <- head(order(-rowVars(assay(rld))),30) The heatmap becomes more interesting if we do not look at absolute expression strength but rather at the amount by which each gene deviates in a specific sample from the gene’s average across all samples. Hence, we center each genes’ values across samples, and plot a heatmap. We provide the column side colors to help identify the treated samples (in blue) from the untreated samples (in grey). vsdata <- vst(dds, blind=FALSE) plotPCA(vsdata, intgroup="Group") plotPCA(vsdata, intgroup="Group") + geom_label(aes(label = colnames(vsdata))) topVarGenes <- head(order(-rowVars(assay(vsdata))),50) mat <- assay(vsdata)[ topVarGenes, ] mat <- mat - rowMeans(mat) df <- as.data.frame(colData(vsdata)[,c("ID","R","D","M","Treatment")]) pheatmap(mat, annotation_col=df, cluster_rows=FALSE, cluster_cols=FALSE) res1 <- res %>% filter(padj<0.05) %>% arrange(padj) res2 <- res1[1:100,] %>% arrange(desc(log2FoldChange)) Sig <- res2$row mat <- assay(vsdata)[ Sig, ] mat <- mat - rowMeans(mat) df <- as.data.frame(colData(vsdata)[,c("ID","Treatment","HPV","PIK3CAmutation","Response.Primary","Responder")]) df <- as.data.frame(colData(vsdata)[,c("ID","Group")]) pheatmap(mat, annotation_col=df, cluster_rows=F, cluster_cols=F) mat <- assay(vsdata)[ G, ] mat1 <- as.data.frame(mat) mat1 <- mat1 %>% arrange(desc(TD3)) pheatmap(mat1, annotation_col=df, cluster_rows=F, cluster_cols=F) zmat <- scale(tmat1, center = TRUE, scale = TRUE) upon column sacPlsda <- opls(vsdata, "Treatment", orthoI = 1) PLS-DA GSEA library(org.Hs.eg.db) ens2symbol <- AnnotationDbi::select(org.Hs.eg.db, key=res$row, columns="SYMBOL", keytype="SYMBOL") ens2symbol <- as_tibble(ens2symbol) ens2symbol res2 <- res %>% dplyr::select(SYMBOL, stat) %>% na.omit() %>% distinct() %>% group_by(SYMBOL) %>% summarize(stat=mean(stat)) res2 library(fgsea) ranks <- deframe(res2) head(ranks, 20) pathways.hallmark <- gmtPathways("C:/Users/~/Desktop/GSEA dataset/ h.all.v2023.1.Hs.symbols.gmt") fgseaRes <- fgsea(pathways=pathways.hallmark, stats=ranks, nperm=1000) fgseaRes <- fgsea(pathways=pathways.hallmark, stats=ranks, eps = 0.0, minSize = 15, maxSize = 500) fgseaResTidy <- fgseaRes %>% as_tibble() %>% arrange(desc(NES)) # Show in a nice table: fgseaResTidy %>% dplyr::select(-leadingEdge, -ES, -nMoreExtreme) %>% arrange(padj) ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) + geom_col(aes(fill=padj<0.05)) + coord_flip() + labs(x="Pathway", y="Normalized Enrichment Score", title="Hallmark pathways NES from GSEA") + theme_minimal() plotEnrichment(pathways.hallmark[["HALLMARK_APOPTOSIS"]], ranks) + labs(title=" HALLMARK_APOPTOSIS") fgseaRes <- fgsea(pathways=pathways.hallmark, stats=ranks, nperm=1000) go <- fgsea(pathways=gmtPathways("C:/Users/~/Desktop/GSEA dataset/c5.all.v7.2.symbols.gmt"), ranks, nperm=1000) %>% as_tibble() go <- as.matrix(go) write.csv(go, "go.csv") | pathway | pval | padj | NES | size | 1 | HALLMARK_E2F_TARGETS | 0.003125 | 0.016 | 1.50255883270908 | 196 | 2 | HALLMARK_P53_PATHWAY | 0.00307692307692308 | 0.016 | 1.46976609146818 | 193 | ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) + geom_col(aes(fill=padj<0.05)) + coord_flip() + labs(x="Pathway", y="Normalized Enrichment Score", title="Hallmark pathways NES from GSEA") + theme_minimal() fgseaRes <- fgsea(pathways=pathways.hallmark, stats=ranks, nperm=1000) go <- fgsea(pathways=gmtPathways("C:/Users/~/Desktop/GSEA dataset/c5.all.v7.2.symbols.gmt"), ranks, nperm=1000) %>% as_tibble() Re <- fgsea(pathways=gmtPathways("C:/Users/~/Downloads/c2.cp.reactome.v2023.1.Hs.symbols.gmt"), ranks, nperm=1000) %>% as_tibble() go <- as.matrix(go) write.csv(go, "go.csv") Re <- as.matrix(Re) write.csv(Re, "Re.csv") hallmark <- fgsea(pathways=gmtPathways("C:/Users/~/Desktop/GSEA dataset/h.all.v7.2.symbols.gmt "), ranks, nperm=1000) %>% as_tibble() hallmark <- as.matrix(hallmark) write.csv(hallmark, "hallmark Bulk PRE HPV+ R vs NR.csv") plotEnrichment(pathways.hallmark[["HALLMARK_INTERFERON_GAMMA_RESPONSE"]], ranks) + labs(title="HALLMARK_INTERFERON_GAMMA_RESPONSE") |