# -----------------------------------------------------------------------------`
# ISMB'18 Tutorial
#
# CASMAP: Example 3, higher-order epistasis
# -----------------------------------------------------------------------------`

library(CASMAP)

# Set the paths to the input files (genotype, phenotype and covariate).
# Note that X.dat contains a subsampled set of SNPs
genotype_file  <- "X.dat"
phenotype_file <- "Y.dat"
covariate_file <- "C.dat"

# Create an object to perform region-based GWAS (interval search).
obj_hoi <- CASMAP(mode="higherOrderEpistasis")

# Set hyperparameters of the analysis:
# alpha: Target Family-Wise Error Rate (FWER).
# max_comb_size: Maximum number of interacting variants. For example, if set to 4, then only sets with with up to 4 SNPs (inclusive) will be considered. To consider sets of arbitrary length, use value 0 (default).
obj_hoi$setTargetFWER(alpha=0.05)
obj_hoi$setMaxCombinationSize(max_comb_size=0)

# Read input files.
# The _A. thaliana_ genotypes we analyze here are homozygous (original data are binary). See Example 2 for details on how to encode the variants if this is not the case.
obj_hoi$readFiles(genotype_file=genotype_file, phenotype_file=phenotype_file, covariate_file=covariate_file)

# Run the algorithm. Retrieve statistically associated interactions between genomic variants.
# This execution will take a bit longer than the other two examples. 
obj_hoi$execute()

# The analysis is finalized. Get the summary results.
summary_results <- obj_hoi$getSummary()
print(summary_results)

# Get the statistically significant sets of SNPs
sig_sets <- obj_hoi$getSignificantInteractions()
str(sig_sets)

# Display the hits
head(sig_sets[, c("pvalue", "itemsets")])
     
# Save all results to output files.
obj_hoi$writeSummary("output/summary.txt")
obj_hoi$writeProfile("output/profiling.txt")
obj_hoi$writeSignificantInteractions("output/significant_interactions.txt")

