# ------------------------------------------------------------------------------
# Load padlocdev.
library(padlocdev)
# ------------------------------------------------------------------------------
# Establish the paths to each database component.
path_db <- "/path/to/padloc-db/"
path_hmm <- fs::path_join(c(path_db, "hmm"))
path_sys <- fs::path_join(c(path_db, "sys"))
path_hmm_meta <- fs::path_join(c(path_db, "hmm_meta.txt"))
path_sys_meta <- fs::path_join(c(path_db, "sys_meta.txt"))
path_sys_groups <- fs::path_join(c(path_db, "sys_groups.txt"))
# ------------------------------------------------------------------------------
# Read-in the database data.
hmms <- multi_read_hmm_header(path_hmm)
models <- multi_read_padloc_model(path_sys)
hmm_meta <- read_hmm_meta(path_hmm_meta)
sys_meta <- read_sys_meta(path_sys_meta)
sys_groups <- read_sys_groups(path_sys_groups)
# ------------------------------------------------------------------------------
# Verify that HMM file names match their accession.
name_verification <- verify_hmm_names(hmms)
name_verification
# ------------------------------------------------------------------------------
# Compare the profile HMMs with the HMM metadata table.
hmm_files_hmm_meta_comparison <- compare_hmm_files_hmm_meta(hmms, hmm_meta)
hmm_files_hmm_meta_comparison
# ------------------------------------------------------------------------------
# Compare the system models with the system metadata table.
sys_files_sys_meta_comparison <- compare_sys_files_sys_meta(models, sys_meta)
sys_files_sys_meta_comparison
# ------------------------------------------------------------------------------
# Compare the system metadata table with the system groups table.
sys_meta_sys_comparison <- compare_sys_meta_sys_groups(sys_meta, sys_groups)
sys_meta_sys_comparison
# ------------------------------------------------------------------------------
# Expand the HMM metadata table to account for HMMs with multiple name
# assignments, and expand the system models to account for groups of genes.
hmm_meta_expanded <- expand_hmm_meta(hmm_meta)
models_expanded <- expand_gene_groups_all(models, hmm_meta_expanded)
# ------------------------------------------------------------------------------
# Compare the system models with the HMM metadata table.
sys_files_hmm_meta_comparison <- compare_sys_files_hmm_meta(models_expanded, hmm_meta_expanded)
sys_files_hmm_meta_comparison
# ------------------------------------------------------------------------------
# Generate a validity report for the system models. Inspect the validity report
# to confirm that all models are valid. Update the models where necessary, then
# re-read and re-validate to confirm.
validity_report <- report_padloc_model_validity(models_expanded)
why_invalid(validity_report)
# ------------------------------------------------------------------------------
# Check how many HMMs are shared between groups.
group_overlaping <- group_overlap(models_expanded, sys_groups, hmm_meta_expanded, sys_meta)
group_overlaping
# ------------------------------------------------------------------------------
# Once everything is in order, divide the database into sub-groups for testing.
ori_db <- "/path/to/original/unsplit/database"
new_db <- "/path/for/split/database/creation"
divide_database(models_expanded, sys_groups, hmm_meta_expanded, sys_meta, ori_db, new_db)