Package org.forester.surfacing
Class SurfacingUtil
java.lang.Object
org.forester.surfacing.SurfacingUtil
-
Field Summary
-
Method Summary
Modifier and TypeMethodDescriptionstatic void
addAllBinaryDomainCombinationToSet
(GenomeWideCombinableDomains genome, SortedSet<BinaryDomainCombination> binary_domain_combinations) static void
addAllDomainIdsToSet
(GenomeWideCombinableDomains genome, SortedSet<String> domain_ids) static DescriptiveStatistics
calculateDescriptiveStatisticsForMeanValues
(Set<DomainSimilarity> similarities) static void
checkForOutputFileWriteability
(File outfile) static void
checkWriteabilityForPairwiseComparisons
(DomainSimilarity.PRINT_OPTION domain_similarity_print_option, String[][] input_file_properties, String automated_pairwise_comparison_suffix, File outdir) static void
collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile
(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, BinaryDomainCombination.DomainCombinationType dc_type, List<BinaryDomainCombination> all_binary_domains_combination_gained, boolean get_gains) createDomainIdToGoIdMap
(List<PfamToGoMapping> pfam_to_go_mappings) createDomainIdToSecondaryFeaturesMap
(File secondary_features_map_file) static Phylogeny
createNjTreeBasedOnMatrixToFile
(File nj_tree_outfile, DistanceMatrix distance) static StringBuilder
createParametersAsString
(boolean ignore_dufs, double ie_value_max, double fs_e_value_max, int max_allowed_overlap, boolean no_engulfing_overlaps, File cutoff_scores_file, BinaryDomainCombination.DomainCombinationType dc_type) static void
static void
decoratePrintableDomainSimilarities
(SortedSet<DomainSimilarity> domain_similarities, DomainSimilarityCalculator.Detailedness detailedness) static void
doit
(List<Protein> proteins, List<String> query_domain_ids_nc_order, Writer out, String separator, String limit_to_species, Map<String, List<Integer>> average_protein_lengths_by_dc) static void
domainsPerProteinsStatistics
(String genome, List<Protein> protein_list, DescriptiveStatistics all_genomes_domains_per_potein_stats, SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo, SortedSet<String> domains_which_are_always_single, SortedSet<String> domains_which_are_sometimes_single_sometimes_not, SortedSet<String> domains_which_never_single, Writer writer) static void
executeDomainLengthAnalysis
(String[][] input_file_properties, int number_of_genomes, DomainLengthsTable domain_lengths_table, File outfile) static void
executeFitchGainsAnalysis
(File output_file, List<BinaryDomainCombination> all_bin_domain_combinations_changed, int sum_of_all_domains_encountered, SortedSet<BinaryDomainCombination> all_bin_domain_combinations_encountered, boolean is_gains_analysis) Warning: This side-effects 'all_bin_domain_combinations_encountered'!static void
executeParsimonyAnalysis
(long random_number_seed_for_fitch_parsimony, boolean radomize_fitch_parsimony, String outfile_name, DomainParsimonyCalculator domain_parsimony, Phylogeny phylogeny, Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, GoNameSpace go_namespace_limit, String parameters_str, Map<String, Set<String>>[] domain_id_to_secondary_features_maps, SortedSet<String> positive_filter, boolean output_binary_domain_combinations_for_graphs, List<BinaryDomainCombination> all_binary_domains_combination_gained_fitch, List<BinaryDomainCombination> all_binary_domains_combination_lost_fitch, BinaryDomainCombination.DomainCombinationType dc_type, Map<String, DescriptiveStatistics> protein_length_stats_by_dc, Map<String, DescriptiveStatistics> domain_number_stats_by_dc, Map<String, DescriptiveStatistics> domain_length_stats_by_domain, Map<String, Integer> tax_code_to_id_map, boolean write_to_nexus, boolean use_last_in_fitch_parsimony, boolean perform_dc_fich) static void
executeParsimonyAnalysisForSecondaryFeatures
(String outfile_name, DomainParsimonyCalculator secondary_features_parsimony, Phylogeny phylogeny, String parameters_str, Map<Species, MappingResults> mapping_results_map, boolean use_last_in_fitch_parsimony) static void
executePlusMinusAnalysis
(File output_file, List<String> plus_minus_analysis_high_copy_base, List<String> plus_minus_analysis_high_copy_target, List<String> plus_minus_analysis_low_copy, List<GenomeWideCombinableDomains> gwcd_list, SortedMap<Species, List<Protein>> protein_lists_per_species, Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, List<Object> plus_minus_analysis_numbers) static void
extractProteinNames
(List<Protein> proteins, List<String> query_domain_ids_nc_order, Writer out, String separator, String limit_to_species) static void
extractProteinNames
(SortedMap<Species, List<Protein>> protein_lists_per_species, String domain_id, Writer out, String separator, String limit_to_species, double domain_e_cutoff) getAllDomainIds
(List<GenomeWideCombinableDomains> gwcd_list) getDomainCounts
(List<Protein> protein_domain_collections) static int
static void
static Phylogeny[]
obtainAndPreProcessIntrees
(File[] intree_files, int number_of_genomes, String[][] input_file_properties) static Phylogeny
obtainFirstIntree
(File intree_file) static String
obtainHexColorStringDependingOnTaxonomyGroup
(String tax_code, Phylogeny phy) static String
obtainTaxonomyGroup
(String tax_code, Phylogeny species_tree) static void
performDomainArchitectureAnalysis
(SortedMap<String, Set<String>> domain_architecutures, SortedMap<String, Integer> domain_architecuture_counts, int min_count, File da_counts_outfile, File unique_da_outfile) static void
preparePhylogeny
(Phylogeny p, DomainParsimonyCalculator domain_parsimony, String date_time, String method, String name, String parameters_str) static void
preparePhylogenyForParsimonyAnalyses
(Phylogeny intree, String[][] input_file_properties) static void
printOutPercentageOfMultidomainProteins
(SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo, Writer log_writer) static void
processFilter
(File filter_file, SortedSet<String> filter) static String[][]
processInputGenomesFile
(File input_genomes) static void
processPlusMinusAnalysisOption
(CommandLineArguments cla, List<String> high_copy_base, List<String> high_copy_target, List<String> low_copy, List<Object> numbers) static void
processPlusMinusFile
(File plus_minus_file, List<String> high_copy_base, List<String> high_copy_target, List<String> low_copy, List<Object> numbers) static StringBuffer
proteinToDomainCombinations
(Protein protein, String protein_id, String separator) static int
storeDomainArchitectures
(String genome, SortedMap<String, Set<String>> domain_architecutures, List<Protein> protein_list, Map<String, Integer> distinct_domain_architecuture_counts) static void
writeAllDomainsChangedOnAllSubtrees
(Phylogeny p, boolean get_gains, String outdir, String suffix_for_filename) static void
writeBinaryDomainCombinationsFileForGraphAnalysis
(String[][] input_file_properties, File output_dir, GenomeWideCombinableDomains gwcd, int i, GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order) static void
writeBinaryStatesMatrixAsListToFile
(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, Map<String, String> descriptions) static void
writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis
(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, BinaryDomainCombination.OutputFormat bc_output_format) static void
writeBinaryStatesMatrixToList
(Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, GoNameSpace go_namespace_limit, boolean domain_combinations, CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, String title_for_html, String prefix_for_html, Map<String, Set<String>>[] domain_id_to_secondary_features_maps, SortedSet<String> all_pfams_encountered, SortedSet<String> pfams_gained_or_lost, String suffix_for_per_node_events_file, Map<String, Integer> tax_code_to_id_map) static void
writeDomainCombinationsCountsFile
(String[][] input_file_properties, File output_dir, Writer per_genome_domain_promiscuity_statistics_writer, GenomeWideCombinableDomains gwcd, int i, GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order) static void
writeDomainSimilaritiesToFile
(StringBuilder html_desc, StringBuilder html_title, Writer simple_tab_writer, Writer single_writer, Map<Character, Writer> split_writers, SortedSet<DomainSimilarity> similarities, boolean treat_as_binary, List<Species> species_order, DomainSimilarity.PRINT_OPTION print_option, DomainSimilarity.DomainSimilarityScoring scoring, boolean verbose, Map<String, Integer> tax_code_to_id_map, Phylogeny phy, Set<String> pos_filter_doms) static void
writeHtmlHead
(Writer w, String title) static void
writeMatrixToFile
(File matrix_outfile, List<DistanceMatrix> matrices) static void
writeMatrixToFile
(CharacterStateMatrix<?> matrix, String filename, CharacterStateMatrix.Format format) static void
writePhylogenyToFile
(Phylogeny phylogeny, String filename) static void
writePresentToNexus
(File output_file, File positive_filter_file, SortedSet<String> filter, List<GenomeWideCombinableDomains> gwcd_list) static void
writeProteinListsForAllSpecies
(File output_dir, SortedMap<Species, List<Protein>> protein_lists_per_species, List<GenomeWideCombinableDomains> gwcd_list, double domain_e_cutoff, Set<String> pos_filter_doms) static void
-
Field Details
-
PATTERN_SP_STYLE_TAXONOMY
-
-
Method Details
-
addAllBinaryDomainCombinationToSet
public static void addAllBinaryDomainCombinationToSet(GenomeWideCombinableDomains genome, SortedSet<BinaryDomainCombination> binary_domain_combinations) -
addAllDomainIdsToSet
public static void addAllDomainIdsToSet(GenomeWideCombinableDomains genome, SortedSet<String> domain_ids) -
calculateDescriptiveStatisticsForMeanValues
public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues(Set<DomainSimilarity> similarities) -
checkForOutputFileWriteability
-
checkWriteabilityForPairwiseComparisons
public static void checkWriteabilityForPairwiseComparisons(DomainSimilarity.PRINT_OPTION domain_similarity_print_option, String[][] input_file_properties, String automated_pairwise_comparison_suffix, File outdir) -
collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile
public static void collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, BinaryDomainCombination.DomainCombinationType dc_type, List<BinaryDomainCombination> all_binary_domains_combination_gained, boolean get_gains) -
createDomainIdToGoIdMap
-
createDomainIdToSecondaryFeaturesMap
public static Map<String,Set<String>> createDomainIdToSecondaryFeaturesMap(File secondary_features_map_file) throws IOException - Throws:
IOException
-
createNjTreeBasedOnMatrixToFile
public static Phylogeny createNjTreeBasedOnMatrixToFile(File nj_tree_outfile, DistanceMatrix distance) -
createParametersAsString
public static StringBuilder createParametersAsString(boolean ignore_dufs, double ie_value_max, double fs_e_value_max, int max_allowed_overlap, boolean no_engulfing_overlaps, File cutoff_scores_file, BinaryDomainCombination.DomainCombinationType dc_type) -
createSplitWriters
public static void createSplitWriters(File out_dir, String my_outfile, Map<Character, Writer> split_writers) throws IOException- Throws:
IOException
-
createTaxCodeToIdMap
-
decoratePrintableDomainSimilarities
public static void decoratePrintableDomainSimilarities(SortedSet<DomainSimilarity> domain_similarities, DomainSimilarityCalculator.Detailedness detailedness) -
doit
public static void doit(List<Protein> proteins, List<String> query_domain_ids_nc_order, Writer out, String separator, String limit_to_species, Map<String, List<Integer>> average_protein_lengths_by_dc) throws IOException- Throws:
IOException
-
domainsPerProteinsStatistics
public static void domainsPerProteinsStatistics(String genome, List<Protein> protein_list, DescriptiveStatistics all_genomes_domains_per_potein_stats, SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo, SortedSet<String> domains_which_are_always_single, SortedSet<String> domains_which_are_sometimes_single_sometimes_not, SortedSet<String> domains_which_never_single, Writer writer) -
executeDomainLengthAnalysis
public static void executeDomainLengthAnalysis(String[][] input_file_properties, int number_of_genomes, DomainLengthsTable domain_lengths_table, File outfile) throws IOException - Throws:
IOException
-
executeFitchGainsAnalysis
public static void executeFitchGainsAnalysis(File output_file, List<BinaryDomainCombination> all_bin_domain_combinations_changed, int sum_of_all_domains_encountered, SortedSet<BinaryDomainCombination> all_bin_domain_combinations_encountered, boolean is_gains_analysis) throws IOException Warning: This side-effects 'all_bin_domain_combinations_encountered'!- Parameters:
output_file
-all_bin_domain_combinations_changed
-sum_of_all_domains_encountered
-all_bin_domain_combinations_encountered
-is_gains_analysis
-protein_length_stats_by_dc
-- Throws:
IOException
-
executeParsimonyAnalysis
public static void executeParsimonyAnalysis(long random_number_seed_for_fitch_parsimony, boolean radomize_fitch_parsimony, String outfile_name, DomainParsimonyCalculator domain_parsimony, Phylogeny phylogeny, Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, GoNameSpace go_namespace_limit, String parameters_str, Map<String, Set<String>>[] domain_id_to_secondary_features_maps, SortedSet<String> positive_filter, boolean output_binary_domain_combinations_for_graphs, List<BinaryDomainCombination> all_binary_domains_combination_gained_fitch, List<BinaryDomainCombination> all_binary_domains_combination_lost_fitch, BinaryDomainCombination.DomainCombinationType dc_type, Map<String, DescriptiveStatistics> protein_length_stats_by_dc, Map<String, DescriptiveStatistics> domain_number_stats_by_dc, Map<String, DescriptiveStatistics> domain_length_stats_by_domain, Map<String, Integer> tax_code_to_id_map, boolean write_to_nexus, boolean use_last_in_fitch_parsimony, boolean perform_dc_fich) - Parameters:
all_binary_domains_combination_lost_fitch
-use_last_in_fitch_parsimony
-perform_dc_fich
-consider_directedness_and_adjacency_for_bin_combinations
-all_binary_domains_combination_gained
- if null ignored, otherwise this is to list all binary domain combinations which were gained under unweighted (Fitch) parsimony.
-
executeParsimonyAnalysisForSecondaryFeatures
public static void executeParsimonyAnalysisForSecondaryFeatures(String outfile_name, DomainParsimonyCalculator secondary_features_parsimony, Phylogeny phylogeny, String parameters_str, Map<Species, MappingResults> mapping_results_map, boolean use_last_in_fitch_parsimony) -
executePlusMinusAnalysis
public static void executePlusMinusAnalysis(File output_file, List<String> plus_minus_analysis_high_copy_base, List<String> plus_minus_analysis_high_copy_target, List<String> plus_minus_analysis_low_copy, List<GenomeWideCombinableDomains> gwcd_list, SortedMap<Species, List<Protein>> protein_lists_per_species, Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, List<Object> plus_minus_analysis_numbers) -
extractProteinNames
public static void extractProteinNames(List<Protein> proteins, List<String> query_domain_ids_nc_order, Writer out, String separator, String limit_to_species) throws IOException - Throws:
IOException
-
extractProteinNames
public static void extractProteinNames(SortedMap<Species, List<Protein>> protein_lists_per_species, String domain_id, Writer out, String separator, String limit_to_species, double domain_e_cutoff) throws IOException- Throws:
IOException
-
getAllDomainIds
-
getDomainCounts
-
getNumberOfNodesLackingName
-
log
-
obtainAndPreProcessIntrees
-
obtainFirstIntree
-
obtainHexColorStringDependingOnTaxonomyGroup
public static String obtainHexColorStringDependingOnTaxonomyGroup(String tax_code, Phylogeny phy) throws IllegalArgumentException - Throws:
IllegalArgumentException
-
obtainTaxonomyGroup
public static String obtainTaxonomyGroup(String tax_code, Phylogeny species_tree) throws IllegalArgumentException - Throws:
IllegalArgumentException
-
performDomainArchitectureAnalysis
-
preparePhylogeny
-
preparePhylogenyForParsimonyAnalyses
-
printOutPercentageOfMultidomainProteins
-
processFilter
-
processInputGenomesFile
-
processPlusMinusAnalysisOption
-
processPlusMinusFile
-
proteinToDomainCombinations
public static StringBuffer proteinToDomainCombinations(Protein protein, String protein_id, String separator) -
sortDomainsWithAscendingConfidenceValues
-
storeDomainArchitectures
-
writeAllDomainsChangedOnAllSubtrees
public static void writeAllDomainsChangedOnAllSubtrees(Phylogeny p, boolean get_gains, String outdir, String suffix_for_filename) throws IOException - Throws:
IOException
-
writeBinaryDomainCombinationsFileForGraphAnalysis
public static void writeBinaryDomainCombinationsFileForGraphAnalysis(String[][] input_file_properties, File output_dir, GenomeWideCombinableDomains gwcd, int i, GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order) -
writeBinaryStatesMatrixAsListToFile
public static void writeBinaryStatesMatrixAsListToFile(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, Map<String, String> descriptions) -
writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis
public static void writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, BinaryDomainCombination.OutputFormat bc_output_format) -
writeBinaryStatesMatrixToList
public static void writeBinaryStatesMatrixToList(Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, GoNameSpace go_namespace_limit, boolean domain_combinations, CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, String title_for_html, String prefix_for_html, Map<String, Set<String>>[] domain_id_to_secondary_features_maps, SortedSet<String> all_pfams_encountered, SortedSet<String> pfams_gained_or_lost, String suffix_for_per_node_events_file, Map<String, Integer> tax_code_to_id_map) -
writeDomainCombinationsCountsFile
public static void writeDomainCombinationsCountsFile(String[][] input_file_properties, File output_dir, Writer per_genome_domain_promiscuity_statistics_writer, GenomeWideCombinableDomains gwcd, int i, GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order) -
writeDomainSimilaritiesToFile
public static void writeDomainSimilaritiesToFile(StringBuilder html_desc, StringBuilder html_title, Writer simple_tab_writer, Writer single_writer, Map<Character, Writer> split_writers, SortedSet<DomainSimilarity> similarities, boolean treat_as_binary, List<Species> species_order, DomainSimilarity.PRINT_OPTION print_option, DomainSimilarity.DomainSimilarityScoring scoring, boolean verbose, Map<String, throws IOExceptionInteger> tax_code_to_id_map, Phylogeny phy, Set<String> pos_filter_doms) - Throws:
IOException
-
writeHtmlHead
- Throws:
IOException
-
writeMatrixToFile
public static void writeMatrixToFile(CharacterStateMatrix<?> matrix, String filename, CharacterStateMatrix.Format format) -
writeMatrixToFile
-
writePhylogenyToFile
-
writePresentToNexus
-
writeProteinListsForAllSpecies
-
writeTaxonomyLinks
public static void writeTaxonomyLinks(Writer writer, String species, Map<String, Integer> tax_code_to_id_map) throws IOException- Throws:
IOException
-