# -----------------------------------------------------------------------------`
# ISMB'18 Tutorial
#
# CASMAP: Example 1, combinatorial motif binding
# -----------------------------------------------------------------------------`

# Import CASMAP package
library(CASMAP)

# Path to input files (genotype, phenotype and categorical covariate)
# Note: for simplicity, the data files are assumed to be located in the same directory as the scripts

# Set the paths for the input files
data_file  <- "breast_cancer_matrix.dat"
label_file <- "breast_cancer_label.dat"

# Create an object to search for higher order interactions
obj_hoi <- CASMAP(mode="higherOrderEpistasis")

# Set hyperparameters of the analysis
#   alpha: Target Family-Wise Error Rate (FWER)
#   max_comb_size: Maximum number of interacting variants (set max_comb_size = 0 for unlimited order)
obj_hoi$setTargetFWER(alpha=0.05)
obj_hoi$setMaxCombinationSize(max_comb_size=0)

# Print the contents of the object
print(obj_hoi)

# Read input files
# NOTE: Order below does not reflect positional input argument order. 
#       We recommended to use keyword arguments as shown below.
obj_hoi$readFiles(genotype_file=data_file, phenotype_file=label_file)

# Print the contents of the object
# Note the reference to the input files and covariate = True (the covariate is optional)
print(obj_hoi)

# Execute the search
obj_hoi$execute()

# The analysis is finalized. Get the summary results.
summary_results <- obj_hoi$getSummary()
print(summary_results)

# Get the statistically significant sets of motifs.
sig_sets <- obj_hoi$getSignificantInteractions()
names(sig_sets)

# Display the most significant sets.
sort_idx <- order(sig_sets$pvalue)
sig_sets_ordered <- sig_sets[sort_idx, ]
head(sig_sets_ordered[, c("pvalue", "itemsets")], n=5)

# Display the borderline significant sets.
sort_idx <- order(sig_sets$pvalue)
sig_sets_ordered <- sig_sets[sort_idx, ]
tail(sig_sets_ordered[, c("pvalue", "itemsets")], n=5)

