1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
|
#include <algorithm>
#include <ctime>
#include <deque>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <sstream>
#include <string>
#include <vector>
//----------------------------------------------------------------------------
// JLBorges's Circular Buffer for the last N lines of a file
// Modified to store lines in a std::deque<> instead of a std::vector<>
//
template <typename ElementType, std::size_t MaxCacheSize = 1000>
struct circular_buffer
{
explicit circular_buffer( std::size_t buffsz = 500 ) : sz(buffsz) {}
void push( ElementType&& line )
{
buffer.push_front( std::move(line) ) ; // note: rbegin(), rend() in move_lines()
static const std::size_t max_cache_size = std::max( sz, MaxCacheSize ) ;
if( buffer.size() > max_cache_size ) buffer.resize(sz) ;
}
template <typename Container>
Container move_lines()
{
if( buffer.size() > sz ) buffer.resize(sz) ;
// http://en.cppreference.com/w/cpp/iterator/move_iterator
return { std::make_move_iterator( buffer.rbegin() ), std::make_move_iterator( buffer.rend() ) } ;
}
std::size_t sz ;
std::deque<ElementType> buffer ;
};
template <typename Container>
Container get_last_n_lines( std::ifstream file, std::size_t n )
{
circular_buffer<std::string> buffer(n) ;
std::string line ;
while( std::getline( file, line ) ) buffer.push( std::move(line) ) ;
return buffer.move_lines<Container>() ;
}
//----------------------------------------------------------------------------
// A parsed-into-columns line AND its 'number' for memoization.
//
struct record
{
typedef std::vector <std::string> items_type;
std::size_t line_number;
items_type items;
record() = default;
record( std::size_t line_number, const items_type& items ):
line_number( line_number ),
items( std::move( items ) )
{ }
};
//----------------------------------------------------------------------------
// Split the line into whitespace-delineated columns
//
record::items_type split( const std::string& record )
{
std::istringstream ss( record );
return
{
std::make_move_iterator( std::istream_iterator <std::string> ( ss ) ),
std::make_move_iterator( std::istream_iterator <std::string> () )
};
}
//----------------------------------------------------------------------------
template <typename Container>
std::string find_match( const std::string& s1, const std::string& s2, const Container& lines )
{
// There must be at least three rows
if (lines.size() < 3) return "";
// Here we memoize the parsed records
circular_buffer <record, 10> records( 2 );
// This function looks up an already-parsed record.
// If not found, it parses the line and adds it to the memoized list of records.
auto get_record = [ &records ]( std::size_t line_number, const std::string& line )
{
for (auto buffer : records.buffer)
if (buffer.line_number == line_number)
return buffer.items;
records.push( record( line_number, split( line ) ) );
return records.buffer[0].items;
};
// Search every line except the last two for s1
std::size_t num_lines = lines.size() - 2;
std::size_t line_number = -1;
for (const auto& line : lines)
if (++line_number >= num_lines) break;
else
{
// Should we ignore this line?
if (line.find( s1 ) == line.npos) continue;
// line (string) --> record (columns)
record::items_type r1 = get_record( line_number, line );
// For each matching column in the current record
for (std::size_t col = 0; col < r1.size(); col++)
if (r1[col] == s1)
{
// If the corresponding column in the second line matches s2
record::items_type r2 = get_record( line_number + 1, lines[ line_number + 1 ] );
if (r2[ col ] == s2)
{
record::items_type r3 = split( lines[ line_number + 2 ] );
return r3[ col ];
}
}
}
return "";
}
//----------------------------------------------------------------------------
void seconds( std::time_t a, std::time_t b )
{
double t = double( b - a ) / CLOCKS_PER_SEC;
double s = fmod( t, 60*60 ); t /= 60*60;
unsigned m = fmod( t, 60 ); t /= 60;
unsigned h = t;
std::cout
<< std::setfill( '0' ) << h << ":"
<< std::setw( 2 ) << std::setfill( '0' ) << m << ":"
<< std::setw( 7 ) << std::setfill( '0' )
<< std::fixed << std::setprecision( 4 ) << s
<< "\n";
}
//----------------------------------------------------------------------------
int main( int argc, char** argv )
{
if (argc != 4)
{
std::cerr << "usage: " << argv[0] << " FILENAME NUMBER UNITS\n";
return 1;
}
const auto t1 = std::clock();
typedef std::deque <std::string> Lines;
Lines lines = get_last_n_lines <Lines> ( std::ifstream( argv[1] ), 500 );
const auto t2 = std::clock();
std::string s = find_match( argv[2], argv[3], lines );
const auto t3 = std::clock();
if (s.empty()) std::cout << "(not found)\n";
else std::cout << s << "\n";
std::cout << "read file: ";
seconds( t1, t2 );
std::cout << "search: ";
seconds( t2, t3 );
}
/*
123456 678942
kg/s Pa
26.87 6.58E6
*/
|