// Boost.Bimap // // Copyright (c) 2006-2007 Matias Capeletto // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) /***************************************************************************** Boost.MultiIndex *****************************************************************************/ #include //[ code_mi_to_b_path_mi_hashed_indices #include #include #include #include #include #include #include #include using namespace boost::multi_index; namespace bl = boost::lambda; // word_counter keeps the ocurrences of words inserted. A hashed // index allows for fast checking of preexisting entries. struct word_counter_entry { std::string word; unsigned int occurrences; word_counter_entry( std::string word_ ) : word(word_), occurrences(0) {} }; typedef multi_index_container < word_counter_entry, indexed_by < ordered_non_unique < BOOST_MULTI_INDEX_MEMBER( word_counter_entry,unsigned int,occurrences), std::greater >, hashed_unique < BOOST_MULTI_INDEX_MEMBER(word_counter_entry,std::string,word) > > > word_counter; typedef boost::tokenizer > text_tokenizer; int main() { std::string text= "En un lugar de la Mancha, de cuyo nombre no quiero acordarme... " "...snip..." "...no se salga un punto de la verdad."; // feed the text into the container word_counter wc; text_tokenizer tok(text,boost::char_separator(" \t\n.,;:!?'\"-")); unsigned int total_occurrences = 0; for( text_tokenizer::iterator it = tok.begin(), it_end = tok.end(); it != it_end ; ++it ) { ++total_occurrences; word_counter::iterator wit = wc.insert(*it).first; wc.modify_key( wit, ++ bl::_1 ); } // list words by frequency of appearance std::cout << std::fixed << std::setprecision(2); for( word_counter::iterator wit = wc.begin(), wit_end=wc.end(); wit != wit_end; ++wit ) { std::cout << std::setw(11) << wit->word << ": " << std::setw(5) << 100.0 * wit->occurrences / total_occurrences << "%" << std::endl; } return 0; } //]