1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
|
for( unsigned i = 0; i != resNum; ++i ) {
stringstream temp;
temp << i+1;
string idx;
temp >> idx;
string pstr("<div class=\"result(-op)?\\s\\bc-container.+\\bid=\""+idx+"\"");
cout << pstr << endl;
regex pattern2find(pstr, regex::ECMAScript | regex::icase);
smatch matchRes;
regex_search(web_all, matchRes, pattern2find, regex_constants::match_default);
if( matchRes.empty() ){
break;
}else{
startPos.push_back(matchRes.position());
cout << matchRes.str() << endl; //this works fine
}
}
//get the title and abstract of the search results
for( unsigned i = 0; i != startPos.size(); ++i ){
string pstr_t("<h3 class=\"t\">[^\\w]*<a[^\\}]*[^\\w]*href[^>]*>.*</a>");
string pstr_a("<div class=\"c-abstract[^>]*>.*</div>");
cout << pstr_t << endl << pstr_a << endl;
regex titleP2F(pstr_t, regex::ECMAScript | regex::icase);
regex abstP2F(pstr_a, regex::ECMAScript | regex::icase);
boost::smatch titleMRes;
boost::smatch abstMRes;
if(i != startPos.size()-1){
regex_search<string::const_iterator>(web_all.begin()+startPos[i], web_all.begin()+startPos[i+1], titleMRes, titleP2F, regex_constants::match_default);
regex_search<string::const_iterator>(web_all.begin()+startPos[i], web_all.begin()+startPos[i+1], abstMRes, abstP2F, regex_constants::match_default);
}else{
regex_search<string::const_iterator>(web_all.begin()+startPos[i], web_all.end(), titleMRes, titleP2F, regex_constants::match_default);
regex_search<string::const_iterator>(web_all.begin()+startPos[i], web_all.end(), abstMRes, abstP2F, regex_constants::match_default);
}
if( !titleMRes.empty() ){
cout << titleMRes.str() << endl; // error here
}
if( !abstMRes.empty() ){
cout << abstMRes.str() << endl;
}
}
|