## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
fig.align='center',
external=TRUE,
echo=TRUE,
warning=FALSE,
comment = "#>"
)
## ----echo=FALSE---------------------------------------------------------------
cap01<-"Comprehensive diagram of complete mobileRNA workflows. "
## ----fig.align="centre", echo=FALSE, fig.cap=cap01----------------------------
knitr::include_graphics("../man/figures/mobileRNA_graphic_2.png")
## ----eval=FALSE, message=FALSE------------------------------------------------
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("mobileRNA")
## ----eval=FALSE, message=FALSE------------------------------------------------
# if (!require("devtools")) install.packages("devtools")
# devtools::install_github("KJeynesCupper/mobileRNA", ref = "main")
## ----message=FALSE------------------------------------------------------------
library("mobileRNA")
## ----Load, message=FALSE------------------------------------------------------
# Load small RNA data
data("sRNA_data")
# Load messenger RNA data
data("mRNA_data")
## ----message=FALSE------------------------------------------------------------
fasta_1 <- system.file("extdata","reduced_chr12_Eggplant.fa.gz",
package="mobileRNA")
fasta_2 <-system.file("extdata","reduced_chr2_Tomato.fa.gz",
package="mobileRNA")
# define temporary output directory - replace with your directory
output_assembly_file <- file.path(tempfile("merged_assembly",
fileext = ".fa"))
# merge
merged_reference <- RNAmergeGenomes(genomeA = fasta_1,
genomeB = fasta_2,
output_file = output_assembly_file)
## ----message=FALSE------------------------------------------------------------
anno1 <- system.file("extdata","reduced_chr12_Eggplant.gff.gz",
package="mobileRNA")
anno2 <- system.file("extdata","reduced_chr2_Tomato.gff.gz",
package="mobileRNA")
# define temporary output directory
output_annotation_file <- file.path(tempfile("merged_annotation",
fileext = ".gff3"))
# merge annotation files into a single file
merged_annotation <- RNAmergeAnnotations(annotationA = anno1,
annotationB = anno2,
output_file = output_annotation_file)
## ----eval=FALSE---------------------------------------------------------------
# # directory containing only sRNAseq samples
# samples <- system.file("extdata/sRNAseq",package="mobileRNA")
#
# # output location
# output_location <- tempdir()
#
# mapRNA(input = "sRNA",
# input_files_dir = samples,
# output_dir = output_location,
# genomefile = output_assembly_file,
# condaenv = "/Users/user-name/miniconda3/envs/ShortStack4",
# mmap = "n")
## ----eval=FALSE---------------------------------------------------------------
# # Directory containing results
# results_dir <- file.path(output_location,"2_sRNA_results")
#
# # Sample names and total number of reads, in the same order.
# sample_names <- c("selfgraft_demo_1", "selfgraft_demo_2",
# "selfgraft_demo_3", "heterograft_demo_1",
# "heterograft_demo_2", "heterograft_demo_3")
#
#
# sRNA_data_demo <- RNAimport(input = "sRNA",
# directory = results_dir,
# samples = sample_names)
## -----------------------------------------------------------------------------
data("sRNA_data")
## ----message=FALSE------------------------------------------------------------
# define control samples
controls <- c("selfgraft_1", "selfgraft_2", "selfgraft_3")
mobile_sRNA <- RNAmobile(input = "sRNA",
data = sRNA_data,
controls = controls,
genome.ID = "B",
task = "keep")
## ----eval=FALSE---------------------------------------------------------------
# # save output as txt file
# write.table(mobile_sRNA, "./sRNA_mobile_output.txt")
## ----echo=FALSE---------------------------------------------------------------
cap1 <- "An example facet line graph to show the distribution of sRNA classes within each sample."
cap2 <- "An example facet bar graph to show the distribution of sRNA classes within each sample."
## -----------------------------------------------------------------------------
# plot each replicate as a line, separately, and facet
sample_distribution_line <- RNAdistribution(sRNA_data,
style = "line",
overlap = FALSE,
facet = TRUE,
colour = "darkgreen")
# plot each replicate as a bar, separately, and facet
sample_distribution_bar <- RNAdistribution(sRNA_data,
style = "bar",
facet = TRUE,
colour ="lightblue")
## ----fig.cap=cap1, fig.show="hold", fig.height=10, fig.width=15---------------
# View plot (only)
sample_distribution_line$plot
## ----fig.cap=cap2, fig.show="hold", fig.height=10, fig.width=15---------------
# View plot (only)
sample_distribution_bar$plot
## ----echo=FALSE---------------------------------------------------------------
cap4 <-"An example of a PCA, illustrating the sRNA data set sample similarity"
## ----message=FALSE, fig.cap=cap4, fig.show="hold", fig.height=8, fig.width=15----
groups <- c("Heterograft", "Heterograft", "Heterograft",
"Selfgraft", "Selfgraft", "Selfgraft")
plotSamplePCA(data = sRNA_data, group = groups, size.ratio = 2)
## ----echo=FALSE---------------------------------------------------------------
cap5 <-"An example of a heatmap, illustrating the sRNA data set sample similarity"
## ----message=FALSE, fig.cap=cap5, fig.show="hold"-----------------------------
plotSampleDistance(sRNA_data)
## ----message=FALSE------------------------------------------------------------
# define consensus, store as a data summary file.
sRNA_data_dicercall <- RNAdicercall(data = sRNA_data,
chimeric = TRUE,
genome.ID = "B_",
controls = c("selfgraft_1",
"selfgraft_2",
"selfgraft_3"))
## ----message=FALSE------------------------------------------------------------
# Subset data for analysis: 24-nt sRNAs
sRNA_24 <- RNAsubset(sRNA_data_dicercall, class = 24)
# Subset data for analysis: 21/22-nt sRNAs
sRNA_2122 <- RNAsubset(sRNA_data_dicercall, class = c(21, 22))
## ----message=FALSE------------------------------------------------------------
consensus_plot <- RNAdistribution(data = sRNA_data_dicercall,
style = "bar",
data.type = "consensus")
## ----echo=FALSE---------------------------------------------------------------
cap6<-"An example of the distribution of small RNA consensus dicer classifications. "
## ----fig.cap=cap6, fig.height=10, fig.width=15--------------------------------
# view
consensus_plot$plot
## ----message=FALSE------------------------------------------------------------
#reorder df
controls <- c("selfgraft_1", "selfgraft_2", "selfgraft_3")
reorder_df <- RNAreorder(sRNA_data_dicercall, controls)
# sample conditions in order within dataframe
groups <- c("Selfgraft", "Selfgraft", "Selfgraft",
"Heterograft", "Heterograft", "Heterograft")
## Differential analysis of whole dataset: DESeq2 method
sRNA_DESeq2 <- RNAdifferentialAnalysis(data = reorder_df,
group = groups,
method = "DESeq2")
## ----results='asis'-----------------------------------------------------------
RNAsummary(sRNA_DESeq2)
## ----results='asis'-----------------------------------------------------------
RNAsummary(sRNA_DESeq2, alpha=0.05)
## ----eval = FALSE-------------------------------------------------------------
# write.table(sRNA_DESeq2, "./sRNA_core_dataset.txt")
## ----fig.show="hold"----------------------------------------------------------
# summary of statistical sRNA clusters
RNAsummary(sRNA_DESeq2, alpha=0.05)
# select significant
significant_sRNAs <- sRNA_DESeq2[sRNA_DESeq2$padjusted < 0.05, ]
## ----echo = FALSE-------------------------------------------------------------
capp1 <- "Heatmap of statistically significant sRNA clusters"
## ----fig.show="hold", message=FALSE-------------------------------------------
p1 <- plotHeatmap(significant_sRNAs, value = "RPM", row.names = FALSE,
title = "Heatmap of log-transformed FPKM")
## ----fig.show="hold", message=FALSE, fig.cap=capp1----------------------------
p1$plot
## ----message=FALSE------------------------------------------------------------
# select sRNA clusters only found in treatment & not in the control samples
gained_sRNA <- RNApopulation(data = sRNA_DESeq2,
conditions = c("heterograft_1",
"heterograft_2" ,
"heterograft_3"),
chimeric = TRUE,
genome.ID = "B_",
controls = c("selfgraft_1",
"selfgraft_2",
"selfgraft_3"))
# look at number of sRNA cluster only found in treatment
nrow(gained_sRNA)
## ----message=FALSE------------------------------------------------------------
# select sRNA clusters only found in control & not in the treatment samples
lost_sRNA <- RNApopulation(data = sRNA_DESeq2,
conditions = c("selfgraft_1",
"selfgraft_2" ,
"selfgraft_3"),
chimeric = TRUE,
genome.ID = "B_",
controls = c("selfgraft_1",
"selfgraft_2",
"selfgraft_3"))
# look at number of sRNA cluster only found in control
nrow(lost_sRNA)
## ----message=FALSE------------------------------------------------------------
gained_sRNA_sequences <- RNAsequences(gained_sRNA, method = "consensus" )
## ----message=FALSE------------------------------------------------------------
gained_sRNA_attributes <- RNAattributes(data = gained_sRNA,
match ="genes",
annotation = output_annotation_file)
## ----message=FALSE------------------------------------------------------------
# vector of control names
control_names <- c("selfgraft_1", "selfgraft_2", "selfgraft_3")
## Identify potential tomato mobile molecules
mobile_sRNA <- RNAmobile(input = "sRNA",
data = sRNA_DESeq2,
controls = control_names,
genome.ID = "B_",
task = "keep",
statistical = FALSE)
## ----echo=FALSE---------------------------------------------------------------
cap7 <- "An example heatmap of candidate mobile small RNAs. Where the columns represent the sample replicates and the rows represent the small RNA cluster."
## ----fig.cap=cap7, fig.show="hold" , message=FALSE----------------------------
p2 <- plotHeatmap(mobile_sRNA, row.names = FALSE)
p2$plot
## ----eval = FALSE-------------------------------------------------------------
# write.table(mobile_sRNA, "./candidate_mobile_sRNAs.txt")
## -----------------------------------------------------------------------------
annotation_file <- system.file("extdata",
"prefix_reduced_chr2_Tomato.gff.gz",
package="mobileRNA")
mobile_attributes <- RNAattributes(data = mobile_sRNA,
match ="genes",
annotation = annotation_file)
## -----------------------------------------------------------------------------
mobile_features <- RNAfeatures(data = mobile_sRNA,
annotation = annotation_file)
print(mobile_features)
# plot as stacked bar chart
features_plot <- plotRNAfeatures(data = sRNA_data,
annotation = annotation_file)
plot(features_plot)
## ----message=FALSE------------------------------------------------------------
mobile_sequences <- RNAsequences(mobile_sRNA, method = "consensus")
## ----eval = FALSE-------------------------------------------------------------
# write.table(mobile_sequences, "./candidate_mobile_sRNA_sequences.txt")
## ----message=FALSE------------------------------------------------------------
# load `dplyr` package
library(dplyr)
# select the cluster and sequence columns
sequences <- mobile_sequences %>% select(Cluster, Sequence)
# add prefix, remove row with NA
prefix <- ">"
sequences$Cluster <- paste0(prefix, sequences$Cluster)
sequences <- na.omit(sequences)
# convert to required format:
res <- do.call(rbind, lapply(seq(nrow(sequences)),
function(i) t(sequences[i, ])))
## ----eval = FALSE, message=FALSE----------------------------------------------
# # save output
# write.table(res, file ="./mobile_sRNA_sequences.txt",
# row.names = FALSE, col.names = FALSE, quote = FALSE)
## ----eval = FALSE-------------------------------------------------------------
# # load `bioseq` package
# library(bioseq)
#
# # build a RNA vector from a character vector:
# mobileRNA_seq <- bioseq::rna(mobile_sequences$Sequence)
#
# # Find a consensus sequence for a set of sequences:
# bioseq::seq_consensus(mobileRNA_seq)
## ----eval=FALSE---------------------------------------------------------------
# # names of samples, represented by folder names produced in previous step
# samples <- c("[sample names]")
#
# # location of output folders from previous step
# dir <- "[output_dir]"
#
# # merge information of the detected de novo sRNA clusters
# gff_alignment <- GenomicRanges::GRangesList()
# for (i in samples) {
# file_path <- file.path(dir, i, "Results.gff.gz3")
# if (file.exists(file_path)) {
# gff_alignment[[i]] <- rtracklayer::import.gff.gz(file_path)
# } else{
# Stop("File does not exist:", file_path, "\n")
# }
# }
#
# gff_merged <- GenomicRanges::reduce(unlist(gff_alignment),
# ignore.strand = TRUE)
#
# gff_merged <- as.data.frame(gff_merged)
# colnames(gff_merged)[1] <- "chr"
# if('*' %in% gff_merged$strand){
# gff_merged <- gff_merged[, -match("strand", colnames(gff_merged))]
# }
#
# locifile_txt <- data.frame(Locus = paste0(gff_merged$chr, ":",
# gff_merged$start,"-",
# gff_merged$end),
# Cluster = paste0("cluster_",
# seq_len(nrow(gff_merged))))
#
# # save output to location
# dir_locifile <- "[output directory]"
# loci_out <- file.path(dir_locifile,"locifile.txt")
# utils::write.table(locifile_txt, file = loci_out, quote = FALSE,
# sep = "\t", row.names = FALSE, col.names = TRUE)
#
## ----eval=FALSE---------------------------------------------------------------
# # names of samples, represented by folder names produced in previous step
# samples <- c("[sample names]")
#
# # location of output folders from previous step
# dir <- "[output_dir]"
#
# # merge information of the detected de novo sRNA clusters
# gff_alignment <- GenomicRanges::GRangesList()
# for (i in samples) {
# file_path <- file.path(dir, i, "Results.gff.gz3")
# if (file.exists(file_path)) {
# gff_alignment[[i]] <- rtracklayer::import.gff.gz(file_path)
# } else{
# Stop("File does not exist:", file_path, "\n")
# }
# }
#
# gff_merged <- GenomicRanges::reduce(unlist(gff_alignment),
# ignore.strand = TRUE)
#
# gff_merged <- as.data.frame(gff_merged)
# colnames(gff_merged)[1] <- "chr"
# if('*' %in% gff_merged$strand){
# gff_merged <- gff_merged[, -match("strand", colnames(gff_merged))]
# }
#
# locifile_txt <- data.frame(Locus = paste0(gff_merged$chr, ":",
# gff_merged$start,"-",
# gff_merged$end),
# Cluster = paste0("cluster_",
# seq_len(nrow(gff_merged))))
#
# # save output to location
# dir_locifile <- "[output directory]"
# loci_out <- file.path(dir_locifile,"locifile.txt")
# utils::write.table(locifile_txt, file = loci_out, quote = FALSE,
# sep = "\t", row.names = FALSE, col.names = TRUE)
## -----------------------------------------------------------------------------
sessionInfo()