Reproduce iDEP analyses with auto-generated R Markdown

Jul 8, 2018 14 min read 0 Comments R, RNA-Seq, Bioinformatics

iDEP is a tool for interactive analysis of RNA-Seq data. After analyzing their data, users have the option to download a R Markdown file, toegether with other data files, to reproduce the entire workflow. This blog is both the easiest and the hardest of all. It is easy because everything below was automatically generated by iDEP; it is hard as I have to write code to write code when developing iDEP.

1. Read data

R packages and iDEP core Functions. Users can also download the iDEP_core_functions.R file. Many R packages needs to be installed first. This may take hours. Each of these packages took years to develop.So be a patient thief. Sometimes dependencies needs to be installed manually. If you are using an older version of R, and having trouble with package installation, try un-install the current version of R, delete all folders and files, and reinstall from scratch.

 if(file.exists('iDEP_core_functions.R'))
	source('iDEP_core_functions.R') else 
    source('https://raw.githubusercontent.com/iDEP-SDSU/idep/master/shinyapps/idep/iDEP_core_functions.R')

We are using the downloaded gene expression file where gene IDs has been converted to Ensembl gene IDs. This is because the ID conversion database is too large to download. You can use your original file if your file uses Ensembl ID, or you do not want to use the pathway files available in iDEP (or it is not available).

 inputFile <- 'C:/Users/Xijin.Ge/Downloads/Downloaded_Converted_Data.csv'  # Expression matrix
 sampleInfoFile <- 'C:/Users/Xijin.Ge/Downloads/Downloaded_sampleInfoFile.csv' # Experiment design file 
 geneInfoFile <- 'C:/Users/Xijin.Ge/Downloads/Mouse__mmusculus_gene_ensembl_GeneInfo.csv' #Gene symbols, location etc. 
 geneSetFile <- 'C:/Users/Xijin.Ge/Downloads/Mouse__mmusculus_gene_ensembl.db'  # pathway database in SQL; can be GMT format 
 STRING10_speciesFile <- 'https://raw.githubusercontent.com/iDEP-SDSU/idep/master/shinyapps/idep/STRING10_species.csv'

Parameters for reading data

 input_missingValue <- 'geneMedian'	#Missing values imputation method
 input_dataFileFormat <- 1	#1- read counts, 2 FKPM/RPKM or DNA microarray
 input_minCounts <- 0.5	#Min counts
 input_NminSamples <- 1	#Minimum number of samples 
 input_countsLogStart <- 4	#Pseudo count for log CPM
 input_CountsTransform <- 1	#Methods for data transformation of counts. 1-EdgeR's logCPM 2-VST, 3-rlog

 readData.out <- readData(inputFile) 
 library(knitr)   #  install if needed. for showing tables with kable 
 kable( head(readData.out$data) )    # show the first few rows of data

	p53_mock_1	p53_mock_2	p53_mock_3	p53_mock_4	p53_IR_1	p53_IR_2	p53_IR_3	p53_IR_4	null_mock_1	null_mock_2	null_IR_1	null_IR_2
ENSMUSG00000075014	5.986190	10.245204	11.622005	12.445437	19.58986	21.07925	8.279954	17.40506	13.23741	12.91427	6.949322	14.57512
ENSMUSG00000076617	18.174331	18.020764	17.901045	16.892389	18.06544	18.52019	16.933927	17.68636	17.18909	17.39665	17.663020	17.21374
ENSMUSG00000015656	15.349228	15.086274	15.392536	14.817324	15.95674	15.75896	16.233039	16.04712	15.03582	14.98886	18.603855	17.00075
ENSMUSG00000052305	13.766147	11.436670	10.523028	15.927563	10.57175	11.22625	10.125088	10.20851	11.81668	17.59129	10.311098	15.82093
ENSMUSG00000069917	13.511114	11.202619	10.240337	15.915347	10.33765	11.20676	10.280270	10.32910	11.51218	17.41403	10.236792	15.73450
ENSMUSG00000075015	4.023733	7.791965	9.161421	9.809036	16.98655	18.47457	5.767006	14.96939	10.59269	10.25800	4.742948	12.00540

 readSampleInfo.out <- readSampleInfo(sampleInfoFile) 
 kable( readSampleInfo.out )

	p53	Treatment
p53_mock_1	wt	mock
p53_mock_2	wt	mock
p53_mock_3	wt	mock
p53_mock_4	wt	mock
p53_IR_1	wt	IR
p53_IR_2	wt	IR
p53_IR_3	wt	IR
p53_IR_4	wt	IR
null_mock_1	null	mock
null_mock_2	null	mock
null_IR_1	null	IR
null_IR_2	null	IR

 input_selectOrg ="NEW" 
 input_selectGO <- 'KEGG'	#Gene set category 
 input_noIDConversion = TRUE  
 allGeneInfo.out <- geneInfo(geneInfoFile) 
 converted.out = NULL 
 convertedData.out <- convertedData()	 
 nGenesFilter()

## [1] "6882 genes in 12 samples. 6882  genes passed filter.\n Original gene IDs used."

 convertedCounts.out <- convertedCounts()  # converted counts, just for compatibility 
 readCountsBias()  # detecting bias in sequencing depth

## [1] "Warning! Sequencing depth bias detected. Total read counts are significantly different among sample groups (p= 1.04e-02 ) based on ANOVA."

2. Pre-process

# Read counts per library 
 parDefault = par() 
 par(mar=c(12,4,2,2)) 
 # barplot of total read counts
 x <- readData.out$rawCounts
 groups = as.factor( detectGroups(colnames(x ) ) )
 if(nlevels(groups)<=1 | nlevels(groups) >20 )  
  col1 = 'green'  else
  col1 = rainbow(nlevels(groups))[ groups ]				
		 
 barplot( colSums(x)/1e6, 
		col=col1,las=3, main="Total read counts (millions)")

 # Box plot 
 x = readData.out$data 
 boxplot(x, las = 2, col=col1,
    ylab='Transformed expression levels',
    main='Distribution of transformed data')

 #Density plot 
 par(parDefault) 
 densityPlot()

 # Scatter plot of the first two samples 
 plot(x[,1:2],xlab=colnames(x)[1],ylab=colnames(x)[2], 
    main='Scatter plot of first two samples')

 ####plot gene or gene family
 input_selectOrg ="BestMatch" 
 input_geneSearch <- 'HOXA'	#Gene ID for searching 
 genePlot()

 input_useSD <- 'FALSE'	#Use standard deviation instead of standard error in error bar? 
 geneBarPlotError()

3. Heatmap

 # hierarchical clustering tree
 x <- readData.out$data
 maxGene <- apply(x,1,max)
 # remove bottom 25% lowly expressed genes, which inflate the PPC
 x <- x[which(maxGene > quantile(maxGene)[1] ) ,] 
 plot(as.dendrogram(hclust2( dist2(t(x)))), ylab="1 - Pearson C.C.", type = "rectangle")

 #Correlation matrix
 input_labelPCC <- TRUE	#Show correlation coefficient? 
 correlationMatrix()

 # Parameters for heatmap
 input_nGenes <- 1000	#Top genes for heatmap
 input_geneCentering <- TRUE	#centering genes ?
 input_sampleCentering <- FALSE	#Center by sample?
 input_geneNormalize <- FALSE	#Normalize by gene?
 input_sampleNormalize <- FALSE	#Normalize by sample?
 input_noSampleClustering <- FALSE	#Use original sample order
 input_heatmapCutoff <- 4	#Remove outliers beyond number of SDs 
 input_distFunctions <- 1	#which distant funciton to use
 input_hclustFunctions <- 1	#Linkage type
 input_heatColors1 <- 1	#Colors
 input_selectFactorsHeatmap <- 'Sample_Name'	#Sample coloring factors 
 #png('heatmap.png', width = 10, height = 15, units = 'in', res = 300) 
 #staticHeatmap() 
 #dev.off()

heatmap

# heatmapPlotly() # interactive heatmap using Plotly

4. K-means clustering

 input_nGenesKNN <- 2000	#Number of genes fro k-Means
 input_nClusters <- 4	#Number of clusters 
 maxGeneClustering = 12000
 input_kmeansNormalization <- 'geneMean'	#Normalization
 input_KmeansReRun <- 0	#Random seed 

 distributionSD()  #Distribution of standard deviations

 KmeansNclusters()  #Number of clusters

 Kmeans.out = Kmeans()   #Running K-means 
 KmeansHeatmap()   #Heatmap for k-Means

 #Read gene sets for enrichment analysis 
 sqlite  <- dbDriver('SQLite')
 input_selectGO3 <- 'GOBP'	#Gene set category
 input_minSetSize <- 15	#Min gene set size
 input_maxSetSize <- 2000	#Max gene set size 
 GeneSets.out <-readGeneSets( geneSetFile,
    convertedData.out, input_selectGO3,input_selectOrg,
    c(input_minSetSize, input_maxSetSize)  )  
 # Alternatively, users can use their own GMT files by
 #GeneSets.out <- readGMTRobust('somefile.GMT')  
 results <- KmeansGO()  #Enrichment analysis for k-Means clusters	
 results$adj.Pval <- format( results$adj.Pval,digits=3 )
 kable( results, row.names=FALSE)

Cluster	adj.Pval	Genes	Pathways
A	2.69e-144	121	RNA processing
	1.99e-140	106	Ribonucleoprotein complex biogenesis
	2.29e-134	103	NcRNA metabolic process
	3.50e-117	84	Ribosome biogenesis
	6.34e-115	84	NcRNA processing
	3.48e-107	144	Organonitrogen compound metabolic process
	1.71e-101	119	Organonitrogen compound biosynthetic process
	6.86e-93	65	RRNA processing
	2.89e-92	65	RRNA metabolic process
	5.05e-75	74	Translation
B	2.08e-25	36	Cellular response to stress
	4.56e-23	36	Apoptotic process
	4.56e-23	37	Cell death
	5.85e-23	36	Programmed cell death
	5.85e-23	25	Apoptotic signaling pathway
	1.23e-20	24	Positive regulation of cell death
	1.44e-20	23	Cellular response to DNA damage stimulus
	3.58e-19	18	Intrinsic apoptotic signaling pathway
	1.01e-18	22	Positive regulation of programmed cell death
	1.18e-18	14	Intrinsic apoptotic signaling pathway in response to DNA damage
C	2.39e-63	115	Immune system process
	8.23e-45	86	Positive regulation of response to stimulus
	1.18e-44	76	Immune response
	1.51e-42	81	Establishment of protein localization
	7.23e-42	76	Macromolecular complex assembly
	2.29e-40	69	Regulation of immune system process
	2.29e-40	69	Protein complex assembly
	2.29e-40	75	Protein transport
	2.29e-40	69	Protein complex biogenesis
	2.29e-40	72	Protein complex subunit organization
D	9.62e-94	147	Immune system process
	6.16e-55	99	Positive regulation of response to stimulus
	5.38e-52	93	Positive regulation of molecular function
	6.50e-52	93	Protein phosphorylation
	1.86e-51	84	Defense response
	7.25e-51	86	Regulation of intracellular signal transduction
	1.02e-49	72	Cell activation
	2.24e-47	88	Regulation of transport
	1.00e-46	95	Response to external stimulus
	5.13e-46	73	Vesicle-mediated transport

 input_seedTSNE <- 0	#Random seed for t-SNE
 input_colorGenes <- TRUE	#Color genes in t-SNE plot? 
 tSNEgenePlot()  #Plot genes using t-SNE

## Warning: package 'Rtsne' was built under R version 3.5.1

5. PCA and beyond

 input_selectFactors <- 'Sample_Name'	#Factor coded by color
 input_selectFactors2 <- 'Sample_Name'	#Factor coded by shape
 input_tsneSeed2 <- 0	#Random seed for t-SNE 
 #PCA, MDS and t-SNE plots
 PCAplot()

 MDSplot()

 tSNEplot()

 #Read gene sets for pathway analysis using PGSEA on principal components 
 input_selectGO6 <- 'GOBP' 
 GeneSets.out <-readGeneSets( geneSetFile,
    convertedData.out, input_selectGO6,input_selectOrg,
    c(input_minSetSize, input_maxSetSize)  )  
 PCApathway() # Run PGSEA analysis

 cat( PCA2factor() )   #The correlation between PCs with factors

## 
##  Correlation between Principal Components (PCs) with factors
## PC1 is correlated with Treatment (p=6.56e-05).

6. Differentially Expressed Genes (DEG1)

 input_CountsDEGMethod <- 3	#DESeq2= 3,limma-voom=2,limma-trend=1 
 input_limmaPval <- 0.1	#FDR cutoff
 input_limmaFC <- 2	#Fold-change cutoff
 input_selectModelComprions <- c('p53: wt vs. null','Treatment: IR vs. mock')	#Selected comparisons
 input_selectFactorsModel <- c('p53','Treatment')	#Selected comparisons
 input_selectInteractions <- 'p53:Treatment'	#Selected comparisons
 input_selectBlockFactorsModel <- NULL 	#Selected comparisons
 factorReferenceLevels.out <- c('p53:null','Treatment:mock') 

 limma.out <- limma()
 DEG.data.out <- DEG.data()
 limma.out$comparisons

## [1] "wt-null"               "IR-mock"               "I:p53_wt.Treatment_IR"
## [4] "wt-null_for_IR"        "IR-mock_for_wt"

 input_selectComparisonsVenn = limma.out$comparisons[1:3] # use first three comparisons
 input_UpDownRegulated <- FALSE	#Split up and down regulated genes 
 vennPlot() # Venn diagram

  sigGeneStats() # number of DEGs as figure

  sigGeneStatsTable() # number of DEGs as table

##                                 Comparisons   Up Down
## wt-null                             wt-null   20   16
## IR-mock                             IR-mock 1532  702
## I:p53_wt-Treatment_IR I:p53_wt-Treatment_IR  694  478
## wt-null_for_IR               wt-null_for_IR 1118  577
## IR-mock_for_wt               IR-mock_for_wt 1856  487

7. Differentially Expressed Genes (DEG1)

 input_selectContrast <- 'I:p53_wt.Treatment_IR'	#Selected comparisons 
 selectedHeatmap.data.out <- selectedHeatmap.data()
 selectedHeatmap()   # heatmap for DEGs in selected comparison

 # Save gene lists and data into files
 write.csv( selectedHeatmap.data()$genes, 'heatmap.data.csv') 
 write.csv(DEG.data(),'DEG.data.csv' )
 write(AllGeneListsGMT() ,'AllGeneListsGMT.gmt')

 input_selectGO2 <- 'GSKB.TF.Target'	#Gene set category 
 geneListData.out <- geneListData()  
 volcanoPlot()

# scatterPlot()

  MAplot()

  geneListGOTable.out <- geneListGOTable()  
 # Read pathway data again 
 GeneSets.out <-readGeneSets( geneSetFile,
    convertedData.out, input_selectGO2,input_selectOrg,
    c(input_minSetSize, input_maxSetSize)  ) 
 input_removeRedudantSets <- TRUE	#Remove highly redundant gene sets? 
 results <- geneListGO()  #Enrichment analysis for k-Means clusters	
 results$adj.Pval <- format( results$adj.Pval,digits=3 )
 kable( results, row.names=FALSE)

Direction	adj.Pval	nGenes	Pathways
Down regulated	3.3e-61	95	Organonitrogen compound metabolic process
	4.8e-53	84	Small molecule metabolic process
	1.2e-51	75	Cell cycle
	1.4e-48	62	DNA metabolic process
	3.4e-45	67	Organonitrogen compound biosynthetic process
	6.7e-45	61	Cell cycle process
	7.2e-43	71	Macromolecular complex assembly
	2.5e-40	68	Cellular response to stress
	3.0e-40	52	Mitotic cell cycle
	7.6e-39	49	Mitotic cell cycle process
Up regulated	6.0e-41	82	Tissue development
	6.8e-39	79	Regulation of transcription from RNA polymerase II promoter
	6.8e-39	80	Transcription from RNA polymerase II promoter
	6.8e-39	85	Apoptotic process
	6.8e-39	84	Cell development
	1.5e-38	85	Programmed cell death
	7.7e-38	86	Cell death
	2.1e-37	78	Positive regulation of nitrogen compound metabolic process
	5.5e-36	74	Positive regulation of nucleobase-containing compound metabolic process
	1.3e-35	86	Cellular response to organic substance

STRING-db API access. We need to find the taxonomy id of your species, this used by STRING. First we try to guess the ID based on iDEP’s database. Users can also skip this step and assign NCBI taxonomy id directly by findTaxonomyID.out = 10090 # mouse 10090, human 9606 etc.

 STRING10_species = read.csv(STRING10_speciesFile)  
 ix = grep('Mus musculus', STRING10_species$official_name )
 findTaxonomyID.out <- STRING10_species[ix,1] # find taxonomyID
 findTaxonomyID.out

## [1] 10090

Enrichment analysis using STRING

  STRINGdb_geneList.out <- STRINGdb_geneList() #convert gene lists

## Warning:  we couldn't map to STRING 8% of your identifiers

 input_STRINGdbGO <- 'Process'	#'Process', 'Component', 'Function', 'KEGG', 'Pfam', 'InterPro' 
 results <- stringDB_GO_enrichmentData()  # enrichment using STRING	
 results$adj.Pval <- format( results$adj.Pval,digits=3 )
 kable( results, row.names=FALSE)

Direction	adj.Pval	nGenes	Pathways
Up regulated	4.5e-11	66	intracellular signal transduction
	2.6e-10	51	apoptotic process
	4.5e-10	51	programmed cell death
	9.8e-10	51	cell death
	9.8e-10	51	death
	2.2e-09	71	tissue development
	4.1e-09	26	apoptotic signaling pathway
	5.2e-09	20	intrinsic apoptotic signaling pathway
	9.7e-09	15	intrinsic apoptotic signaling pathway in response to DNA damage
	1.7e-08	58	regulation of apoptotic process
	3.2e-08	65	cell development
	3.7e-08	62	positive regulation of molecular function
	5.2e-08	47	negative regulation of signal transduction
	5.2e-08	62	cell surface receptor signaling pathway
	5.2e-08	57	regulation of programmed cell death
Down regulated	2.2e-15	60	cell cycle
	5.2e-15	25	protein folding
	3.1e-14	65	small molecule metabolic process
	3.1e-14	33	small molecule biosynthetic process
	1.1e-13	53	single-organism biosynthetic process
	3.4e-13	61	organonitrogen compound metabolic process
	5.2e-13	46	organonitrogen compound biosynthetic process
	2.4e-12	38	mitotic cell cycle
	2.4e-12	46	cell cycle process
	3.1e-12	37	mitotic cell cycle process
	7.6e-12	30	ncRNA metabolic process
	2.8e-11	24	organic acid biosynthetic process
	2.8e-11	24	carboxylic acid biosynthetic process
	5.0e-11	33	cell division
	8.6e-11	37	DNA metabolic process

PPI network retrieval and analysis

 input_nGenesPPI <- 100	#Number of top genes for PPI retrieval and analysis 
 stringDB_network1(1) #Show PPI network

Generating interactive PPI

 write(stringDB_network_link(), 'PPI_results.html') # write results to html file

## Warning:  we couldn't map to STRING 8% of your identifiers

 browseURL('PPI_results.html') # open in browser

8. Pathway analysis

 input_selectContrast1 <- 'I:p53_wt.Treatment_IR'	#select Comparison 
 #input_selectContrast1 = limma.out$comparisons[3] # manually set
 input_selectGO <- 'KEGG'	#Gene set category 
 #input_selectGO='custom' # if custom gmt file
 input_minSetSize <- 15	#Min size for gene set
 input_maxSetSize <- 2000	#Max size for gene set 
 # Read pathway data again 
 GeneSets.out <-readGeneSets( geneSetFile,
    convertedData.out, input_selectGO,input_selectOrg,
    c(input_minSetSize, input_maxSetSize)  ) 
 input_pathwayPvalCutoff <- 0.2	#FDR cutoff
 input_nPathwayShow <- 30	#Top pathways to show
 input_absoluteFold <- FALSE	#Use absolute values of fold-change?
 input_GenePvalCutoff <- 1	#FDR to remove genes 

 input_pathwayMethod = 1  # 1  GAGE
 gagePathwayData.out <- gagePathwayData()  # pathway analysis using GAGE  
   
 results <- gagePathwayData.out  #Enrichment analysis for k-Means clusters	
 results$adj.Pval <- format( results$adj.Pval,digits=3 )
 kable( results, row.names=FALSE)

Direction	GAGE analysis: I:p53_wt.Treatment_IR	statistic	Genes	adj.Pval
Down	Ribosome biogenesis in eukaryotes	-4.6634	40	2.3e-03
	RNA transport	-4.2098	62	2.9e-03
	Spliceosome	-4.0557	39	4.5e-03
	Protein processing in endoplasmic reticulum	-3.9119	54	4.5e-03
	Metabolic pathways	-3.7276	449	4.5e-03
	DNA replication	-3.4728	20	3.5e-02
	Proteasome	-3.221	23	4.1e-02
	Aminoacyl-tRNA biosynthesis	-3.2197	17	5.0e-02
	Pyrimidine metabolism	-3.1178	50	4.0e-02
	Biosynthesis of amino acids	-3.1022	32	4.1e-02
	Carbon metabolism	-2.9154	47	5.0e-02
	Oxidative phosphorylation	-2.5729	46	1.1e-01
	Ribosome	-2.5555	28	1.2e-01
	Oocyte meiosis	-2.5519	39	1.1e-01
	Progesterone-mediated oocyte maturation	-2.2879	27	2.0e-01

 pathwayListData.out = pathwayListData() 
 enrichmentPlot(pathwayListData.out, 25  )

## Warning: package 'dendextend' was built under R version 3.5.1

  enrichmentNetwork(pathwayListData.out )

## Warning: package 'igraph' was built under R version 3.5.1

#  enrichmentNetworkPlotly(pathwayListData.out)

 input_pathwayMethod = 3  # 1  fgsea 
 fgseaPathwayData.out <- fgseaPathwayData() #Pathway analysis using fgsea 
 results <- fgseaPathwayData.out  #Enrichment analysis for k-Means clusters	
 results$adj.Pval <- format( results$adj.Pval,digits=3 )
 kable( results, row.names=FALSE)

  pathwayListData.out = pathwayListData() 
 enrichmentPlot(pathwayListData.out, 25  )

  enrichmentNetwork(pathwayListData.out )

#  enrichmentNetworkPlotly(pathwayListData.out)

   PGSEAplot() # pathway analysis using PGSEA

## 
## Computing P values using ANOVA

9. Chromosome

 input_selectContrast2 <- 'IR-mock_for_wt'	#select Comparison 
 #input_selectContrast2 = limma.out$comparisons[3] # manually set
 input_limmaPvalViz <- 0.1	#FDR to filter genes
 input_limmaFCViz <- 2	#FDR to filter genes 
# genomePlotly() # shows fold-changes on the genome

10. Biclustering

 input_nGenesBiclust <- 1000	#Top genes for biclustering
 input_biclustMethod <- 'BCCC()'	#Method: 'BCCC', 'QUBIC', 'runibic' ... 
 biclustering.out = biclustering()  # run analysis

## Warning: package 'biclust' was built under R version 3.5.1

 input_selectBicluster <- 1	#select a cluster 
 biclustHeatmap()   # heatmap for selected cluster

 input_selectGO4 <- 'GOBP'	#Gene set category 
 # Read pathway data again 
 GeneSets.out <-readGeneSets( geneSetFile,
    convertedData.out, input_selectGO4,input_selectOrg,
    c(input_minSetSize, input_maxSetSize)  )  
 results <- geneListBclustGO()  #Enrichment analysis for k-Means clusters	
 results$adj.Pval <- format( results$adj.Pval,digits=3 )
 kable( results, row.names=FALSE)

adj.Pval	Genes	Pathways
3.1e-124	207	Immune system process
1.5e-89	157	Organonitrogen compound metabolic process
4.7e-86	148	Positive regulation of molecular function
5.4e-86	155	Cell death
2.0e-83	139	Homeostatic process
3.2e-81	136	Macromolecular complex assembly
3.5e-80	146	Programmed cell death
2.9e-79	144	Apoptotic process
1.1e-75	132	Regulation of cell death
2.2e-75	117	Cell-cell adhesion

11. Co-expression network

 input_mySoftPower <- 5	#SoftPower to cutoff
 input_nGenesNetwork <- 1000	#Number of top genes
 input_minModuleSize <- 20	#Module size minimum 
 wgcna.out = wgcna()   # run WGCNA

## ==========================================================================
## *
## *  Package WGCNA 1.63 loaded.
## *
## *    Important note: It appears that your system supports multi-threading,
## *    but it is not enabled within WGCNA in R. 
## *    To allow multi-threading within WGCNA with all available cores, use 
## *
## *          allowWGCNAThreads()
## *
## *    within R. Use disableWGCNAThreads() to disable threading if necessary.
## *    Alternatively, set the following environment variable on your system:
## *
## *          ALLOW_WGCNA_THREADS=<number_of_processors>
## *
## *    for example 
## *
## *          ALLOW_WGCNA_THREADS=8
## *
## *    To set the environment variable in linux bash shell, type 
## *
## *           export ALLOW_WGCNA_THREADS=8
## *
## *     before running R. Other operating systems or shells will
## *     have a similar command to achieve the same aim.
## *
## ==========================================================================
## 
## 
##    Power SFT.R.sq   slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.9720  2.7600          0.966   572.0     612.0    711
## 2      2   0.9490  1.2200          0.956   412.0     449.0    578
## 3      3   0.9050  0.7070          0.923   325.0     353.0    495
## 4      4   0.7480  0.4190          0.797   269.0     287.0    439
## 5      5   0.5350  0.2490          0.615   230.0     239.0    398
## 6      6   0.1820  0.1090          0.255   200.0     204.0    367
## 7      7   0.0040  0.0131          0.185   178.0     175.0    342
## 8      8   0.0965 -0.0738          0.144   160.0     152.0    321
## 9      9   0.2500 -0.1240          0.238   145.0     133.0    303
## 10    10   0.4570 -0.1930          0.431   132.0     118.0    287
## 11    12   0.5670 -0.2940          0.514   113.0      98.9    263
## 12    14   0.6340 -0.3830          0.587    97.8      84.7    243
## 13    16   0.6890 -0.4420          0.655    86.2      73.5    227
## 14    18   0.7120 -0.4930          0.673    76.9      63.8    213
## 15    20   0.7290 -0.5420          0.683    69.3      55.7    201
## TOM calculation: adjacency..
## ..will not use multithreading.
##  Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.

 softPower()  # soft power curve

  modulePlot()  # plot modules

  listWGCNA.Modules.out = listWGCNA.Modules() #modules

 input_selectGO5 <- 'GOBP'	#Gene set category 
 # Read pathway data again 
 GeneSets.out <-readGeneSets( geneSetFile,
    convertedData.out, input_selectGO5,input_selectOrg,
    c(input_minSetSize, input_maxSetSize)  ) 
 input_selectWGCNA.Module <- '1. turquoise (495 genes)'	#Select a module
 input_topGenesNetwork <- 10	#SoftPower to cutoff
 input_edgeThreshold <- 0.4	#Number of top genes 
 moduleNetwork()	# show network of top genes in selected module

##  softConnectivity: FYI: connecitivty of genes with less than 4 valid samples will be returned as NA.
##  ..calculating connectivities..

 input_removeRedudantSets <- TRUE	#Remove redundant gene sets 
 results <- networkModuleGO()  #Enrichment analysis of selected module
 results$adj.Pval <- format( results$adj.Pval,digits=3 )
 kable( results, row.names=FALSE)

adj.Pval	Genes	Pathways
6.2e-68	106	Organonitrogen compound metabolic process
3.0e-62	61	Ribonucleoprotein complex biogenesis
6.9e-62	68	RNA processing
4.0e-61	84	Organonitrogen compound biosynthetic process
2.4e-56	57	NcRNA metabolic process
4.2e-53	101	Immune system process
2.0e-49	81	Macromolecular complex assembly
3.0e-48	46	Ribosome biogenesis
3.4e-48	84	Establishment of protein localization
4.7e-48	63	Cellular amide metabolic process

iDEP