1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
|
#include <iostream>
#include <string>
#include <map>
#include <algorithm>
#include <cctype>
std::map< std::string, std::string > make_map()
{
return {
{ "LOL", "laughing out loud" },
{ "BFN", "bye for now" },
{ "FTW", "for the win" },
// ...
} ;
}
// simple edit distance (does not look for transpositions)
// https://en.wikipedia.org/wiki/Edit_distance (brute force implementation)
std::size_t levenshtein_distance( const std::string& a, const std::string& b )
{
if( a.empty() ) return b.empty() ? 0 : b.size() ;
if( b.empty() ) return a.size() ;
std::string as { a.begin(), a.end()-1 } ;
std::string bs { b.begin(), b.end()-1 } ;
return std::min( {
levenshtein_distance( as, b ) + 1,
levenshtein_distance( a, bs ) + 1,
levenshtein_distance( as, bs ) + ( a.back() != b.back() )
} ) ;
}
std::pair< std::string, std::string > split_punct( const std::string& str )
{
auto iter = str.rbegin() ;
while( iter != str.rend() && std::ispunct(*iter) ) ++iter ;
return { { str.begin(), iter.base() }, { iter.base(), str.end() } } ;
}
std::string decode( const std::map< std::string, std::string >& map, const std::string& str )
{
auto str_punct = split_punct(str) ;
for( char& c : str_punct.first ) c = std::toupper(c) ;
auto iter = map.find(str_punct.first) ;
if( iter != map.end() ) return iter->second + str_punct.second ;
std::size_t min_distance = 3 ; // max distance of two for a valid suggestion
std::string decoded = str ;
for( const auto& pair : map )
{
const auto distance = levenshtein_distance( pair.first, str_punct.first ) ;
if( distance < min_distance ) // if this is the closest match so far
{
min_distance = distance ;
decoded = "did you mean '" + pair.first + "'? => " + pair.second + str_punct.second ;
}
}
return decoded ;
}
int main()
{
const auto map = make_map() ;
for( std::string maybe_leet : { "LOL", "lola!", "LPL", "llo", "not-leet", "Bfn", "nbf...", "TWF!!" } )
std::cout << maybe_leet << " => " << decode( map, maybe_leet ) << "\n\n" ;
}
|