## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, fig.align='center', external=TRUE, echo=TRUE, warning=FALSE, comment = "#>" ) ## ----echo=FALSE--------------------------------------------------------------- cap01<-"Comprehensive diagram of complete mobileRNA workflows. " ## ----fig.align="centre", echo=FALSE, fig.cap=cap01---------------------------- knitr::include_graphics("../man/figures/mobileRNA_graphic_2.png") ## ----eval=FALSE, message=FALSE------------------------------------------------ # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("mobileRNA") ## ----eval=FALSE, message=FALSE------------------------------------------------ # if (!require("devtools")) install.packages("devtools") # devtools::install_github("KJeynesCupper/mobileRNA", ref = "main") ## ----message=FALSE------------------------------------------------------------ library("mobileRNA") ## ----Load, message=FALSE------------------------------------------------------ # Load small RNA data data("sRNA_data") # Load messenger RNA data data("mRNA_data") ## ----message=FALSE------------------------------------------------------------ fasta_1 <- system.file("extdata","reduced_chr12_Eggplant.fa.gz", package="mobileRNA") fasta_2 <-system.file("extdata","reduced_chr2_Tomato.fa.gz", package="mobileRNA") # define temporary output directory - replace with your directory output_assembly_file <- file.path(tempfile("merged_assembly", fileext = ".fa")) # merge merged_reference <- RNAmergeGenomes(genomeA = fasta_1, genomeB = fasta_2, output_file = output_assembly_file) ## ----message=FALSE------------------------------------------------------------ anno1 <- system.file("extdata","reduced_chr12_Eggplant.gff.gz", package="mobileRNA") anno2 <- system.file("extdata","reduced_chr2_Tomato.gff.gz", package="mobileRNA") # define temporary output directory output_annotation_file <- file.path(tempfile("merged_annotation", fileext = ".gff3")) # merge annotation files into a single file merged_annotation <- RNAmergeAnnotations(annotationA = anno1, annotationB = anno2, output_file = output_annotation_file) ## ----eval=FALSE--------------------------------------------------------------- # # directory containing only sRNAseq samples # samples <- system.file("extdata/sRNAseq",package="mobileRNA") # # # output location # output_location <- tempdir() # # mapRNA(input = "sRNA", # input_files_dir = samples, # output_dir = output_location, # genomefile = output_assembly_file, # condaenv = "/Users/user-name/miniconda3/envs/ShortStack4", # mmap = "n") ## ----eval=FALSE--------------------------------------------------------------- # # Directory containing results # results_dir <- file.path(output_location,"2_sRNA_results") # # # Sample names and total number of reads, in the same order. # sample_names <- c("selfgraft_demo_1", "selfgraft_demo_2", # "selfgraft_demo_3", "heterograft_demo_1", # "heterograft_demo_2", "heterograft_demo_3") # # # sRNA_data_demo <- RNAimport(input = "sRNA", # directory = results_dir, # samples = sample_names) ## ----------------------------------------------------------------------------- data("sRNA_data") ## ----message=FALSE------------------------------------------------------------ # define control samples controls <- c("selfgraft_1", "selfgraft_2", "selfgraft_3") mobile_sRNA <- RNAmobile(input = "sRNA", data = sRNA_data, controls = controls, genome.ID = "B", task = "keep") ## ----eval=FALSE--------------------------------------------------------------- # # save output as txt file # write.table(mobile_sRNA, "./sRNA_mobile_output.txt") ## ----echo=FALSE--------------------------------------------------------------- cap1 <- "An example facet line graph to show the distribution of sRNA classes within each sample." cap2 <- "An example facet bar graph to show the distribution of sRNA classes within each sample." ## ----------------------------------------------------------------------------- # plot each replicate as a line, separately, and facet sample_distribution_line <- RNAdistribution(sRNA_data, style = "line", overlap = FALSE, facet = TRUE, colour = "darkgreen") # plot each replicate as a bar, separately, and facet sample_distribution_bar <- RNAdistribution(sRNA_data, style = "bar", facet = TRUE, colour ="lightblue") ## ----fig.cap=cap1, fig.show="hold", fig.height=10, fig.width=15--------------- # View plot (only) sample_distribution_line$plot ## ----fig.cap=cap2, fig.show="hold", fig.height=10, fig.width=15--------------- # View plot (only) sample_distribution_bar$plot ## ----echo=FALSE--------------------------------------------------------------- cap4 <-"An example of a PCA, illustrating the sRNA data set sample similarity" ## ----message=FALSE, fig.cap=cap4, fig.show="hold", fig.height=8, fig.width=15---- groups <- c("Heterograft", "Heterograft", "Heterograft", "Selfgraft", "Selfgraft", "Selfgraft") plotSamplePCA(data = sRNA_data, group = groups, size.ratio = 2) ## ----echo=FALSE--------------------------------------------------------------- cap5 <-"An example of a heatmap, illustrating the sRNA data set sample similarity" ## ----message=FALSE, fig.cap=cap5, fig.show="hold"----------------------------- plotSampleDistance(sRNA_data) ## ----message=FALSE------------------------------------------------------------ # define consensus, store as a data summary file. sRNA_data_dicercall <- RNAdicercall(data = sRNA_data, chimeric = TRUE, genome.ID = "B_", controls = c("selfgraft_1", "selfgraft_2", "selfgraft_3")) ## ----message=FALSE------------------------------------------------------------ # Subset data for analysis: 24-nt sRNAs sRNA_24 <- RNAsubset(sRNA_data_dicercall, class = 24) # Subset data for analysis: 21/22-nt sRNAs sRNA_2122 <- RNAsubset(sRNA_data_dicercall, class = c(21, 22)) ## ----message=FALSE------------------------------------------------------------ consensus_plot <- RNAdistribution(data = sRNA_data_dicercall, style = "bar", data.type = "consensus") ## ----echo=FALSE--------------------------------------------------------------- cap6<-"An example of the distribution of small RNA consensus dicer classifications. " ## ----fig.cap=cap6, fig.height=10, fig.width=15-------------------------------- # view consensus_plot$plot ## ----message=FALSE------------------------------------------------------------ #reorder df controls <- c("selfgraft_1", "selfgraft_2", "selfgraft_3") reorder_df <- RNAreorder(sRNA_data_dicercall, controls) # sample conditions in order within dataframe groups <- c("Selfgraft", "Selfgraft", "Selfgraft", "Heterograft", "Heterograft", "Heterograft") ## Differential analysis of whole dataset: DESeq2 method sRNA_DESeq2 <- RNAdifferentialAnalysis(data = reorder_df, group = groups, method = "DESeq2") ## ----results='asis'----------------------------------------------------------- RNAsummary(sRNA_DESeq2) ## ----results='asis'----------------------------------------------------------- RNAsummary(sRNA_DESeq2, alpha=0.05) ## ----eval = FALSE------------------------------------------------------------- # write.table(sRNA_DESeq2, "./sRNA_core_dataset.txt") ## ----fig.show="hold"---------------------------------------------------------- # summary of statistical sRNA clusters RNAsummary(sRNA_DESeq2, alpha=0.05) # select significant significant_sRNAs <- sRNA_DESeq2[sRNA_DESeq2$padjusted < 0.05, ] ## ----echo = FALSE------------------------------------------------------------- capp1 <- "Heatmap of statistically significant sRNA clusters" ## ----fig.show="hold", message=FALSE------------------------------------------- p1 <- plotHeatmap(significant_sRNAs, value = "RPM", row.names = FALSE, title = "Heatmap of log-transformed FPKM") ## ----fig.show="hold", message=FALSE, fig.cap=capp1---------------------------- p1$plot ## ----message=FALSE------------------------------------------------------------ # select sRNA clusters only found in treatment & not in the control samples gained_sRNA <- RNApopulation(data = sRNA_DESeq2, conditions = c("heterograft_1", "heterograft_2" , "heterograft_3"), chimeric = TRUE, genome.ID = "B_", controls = c("selfgraft_1", "selfgraft_2", "selfgraft_3")) # look at number of sRNA cluster only found in treatment nrow(gained_sRNA) ## ----message=FALSE------------------------------------------------------------ # select sRNA clusters only found in control & not in the treatment samples lost_sRNA <- RNApopulation(data = sRNA_DESeq2, conditions = c("selfgraft_1", "selfgraft_2" , "selfgraft_3"), chimeric = TRUE, genome.ID = "B_", controls = c("selfgraft_1", "selfgraft_2", "selfgraft_3")) # look at number of sRNA cluster only found in control nrow(lost_sRNA) ## ----message=FALSE------------------------------------------------------------ gained_sRNA_sequences <- RNAsequences(gained_sRNA, method = "consensus" ) ## ----message=FALSE------------------------------------------------------------ gained_sRNA_attributes <- RNAattributes(data = gained_sRNA, match ="genes", annotation = output_annotation_file) ## ----message=FALSE------------------------------------------------------------ # vector of control names control_names <- c("selfgraft_1", "selfgraft_2", "selfgraft_3") ## Identify potential tomato mobile molecules mobile_sRNA <- RNAmobile(input = "sRNA", data = sRNA_DESeq2, controls = control_names, genome.ID = "B_", task = "keep", statistical = FALSE) ## ----echo=FALSE--------------------------------------------------------------- cap7 <- "An example heatmap of candidate mobile small RNAs. Where the columns represent the sample replicates and the rows represent the small RNA cluster." ## ----fig.cap=cap7, fig.show="hold" , message=FALSE---------------------------- p2 <- plotHeatmap(mobile_sRNA, row.names = FALSE) p2$plot ## ----eval = FALSE------------------------------------------------------------- # write.table(mobile_sRNA, "./candidate_mobile_sRNAs.txt") ## ----------------------------------------------------------------------------- annotation_file <- system.file("extdata", "prefix_reduced_chr2_Tomato.gff.gz", package="mobileRNA") mobile_attributes <- RNAattributes(data = mobile_sRNA, match ="genes", annotation = annotation_file) ## ----------------------------------------------------------------------------- mobile_features <- RNAfeatures(data = mobile_sRNA, annotation = annotation_file) print(mobile_features) # plot as stacked bar chart features_plot <- plotRNAfeatures(data = sRNA_data, annotation = annotation_file) plot(features_plot) ## ----message=FALSE------------------------------------------------------------ mobile_sequences <- RNAsequences(mobile_sRNA, method = "consensus") ## ----eval = FALSE------------------------------------------------------------- # write.table(mobile_sequences, "./candidate_mobile_sRNA_sequences.txt") ## ----message=FALSE------------------------------------------------------------ # load `dplyr` package library(dplyr) # select the cluster and sequence columns sequences <- mobile_sequences %>% select(Cluster, Sequence) # add prefix, remove row with NA prefix <- ">" sequences$Cluster <- paste0(prefix, sequences$Cluster) sequences <- na.omit(sequences) # convert to required format: res <- do.call(rbind, lapply(seq(nrow(sequences)), function(i) t(sequences[i, ]))) ## ----eval = FALSE, message=FALSE---------------------------------------------- # # save output # write.table(res, file ="./mobile_sRNA_sequences.txt", # row.names = FALSE, col.names = FALSE, quote = FALSE) ## ----eval = FALSE------------------------------------------------------------- # # load `bioseq` package # library(bioseq) # # # build a RNA vector from a character vector: # mobileRNA_seq <- bioseq::rna(mobile_sequences$Sequence) # # # Find a consensus sequence for a set of sequences: # bioseq::seq_consensus(mobileRNA_seq) ## ----eval=FALSE--------------------------------------------------------------- # # names of samples, represented by folder names produced in previous step # samples <- c("[sample names]") # # # location of output folders from previous step # dir <- "[output_dir]" # # # merge information of the detected de novo sRNA clusters # gff_alignment <- GenomicRanges::GRangesList() # for (i in samples) { # file_path <- file.path(dir, i, "Results.gff.gz3") # if (file.exists(file_path)) { # gff_alignment[[i]] <- rtracklayer::import.gff.gz(file_path) # } else{ # Stop("File does not exist:", file_path, "\n") # } # } # # gff_merged <- GenomicRanges::reduce(unlist(gff_alignment), # ignore.strand = TRUE) # # gff_merged <- as.data.frame(gff_merged) # colnames(gff_merged)[1] <- "chr" # if('*' %in% gff_merged$strand){ # gff_merged <- gff_merged[, -match("strand", colnames(gff_merged))] # } # # locifile_txt <- data.frame(Locus = paste0(gff_merged$chr, ":", # gff_merged$start,"-", # gff_merged$end), # Cluster = paste0("cluster_", # seq_len(nrow(gff_merged)))) # # # save output to location # dir_locifile <- "[output directory]" # loci_out <- file.path(dir_locifile,"locifile.txt") # utils::write.table(locifile_txt, file = loci_out, quote = FALSE, # sep = "\t", row.names = FALSE, col.names = TRUE) # ## ----eval=FALSE--------------------------------------------------------------- # # names of samples, represented by folder names produced in previous step # samples <- c("[sample names]") # # # location of output folders from previous step # dir <- "[output_dir]" # # # merge information of the detected de novo sRNA clusters # gff_alignment <- GenomicRanges::GRangesList() # for (i in samples) { # file_path <- file.path(dir, i, "Results.gff.gz3") # if (file.exists(file_path)) { # gff_alignment[[i]] <- rtracklayer::import.gff.gz(file_path) # } else{ # Stop("File does not exist:", file_path, "\n") # } # } # # gff_merged <- GenomicRanges::reduce(unlist(gff_alignment), # ignore.strand = TRUE) # # gff_merged <- as.data.frame(gff_merged) # colnames(gff_merged)[1] <- "chr" # if('*' %in% gff_merged$strand){ # gff_merged <- gff_merged[, -match("strand", colnames(gff_merged))] # } # # locifile_txt <- data.frame(Locus = paste0(gff_merged$chr, ":", # gff_merged$start,"-", # gff_merged$end), # Cluster = paste0("cluster_", # seq_len(nrow(gff_merged)))) # # # save output to location # dir_locifile <- "[output directory]" # loci_out <- file.path(dir_locifile,"locifile.txt") # utils::write.table(locifile_txt, file = loci_out, quote = FALSE, # sep = "\t", row.names = FALSE, col.names = TRUE) ## ----------------------------------------------------------------------------- sessionInfo()