You can get an XML parsing library.
If you need something really simple follow this pseudocode:
1 2 3 4 5 6 7 8 9 10 11
find '<'
read until '>'if what you read == "name"
read until '<'
read until '>'if what you read == '/'+"name"// assume "name" to be an object of std::string
you got your text
else
repeat from line 4
else
repeat from line 1
Try this code. You first have to open an ifstream as a binary object (not text). You then call
findBlock(stream, yourStartTag, yourEndTag, startingOffsets, nextOffsets); Make sure that startingOffsets(0,0). After this call you should have nextOffsets contain the starting and ending offsets of your data. Next you call readLine. If the call is successful, the returned buffer will be non-empty and contain your text, including eol characters.
inline long skipWhiteSpace(tINPStream& stream)
{
long retVal=0;
tINPStream::char_type ch;
while (!stream.eof())
{
ch = stream.peek(); //inspect the next char in this stream. Don't remove it from the stream if it's not a white space
if (g_WhiteSpace().find_first_of(ch) != std::string::npos)
{
stream.ignore();
retVal++;
continue;
}
else
break;
}
return retVal;
}
currentChar = stream.peek();
if (ch == currentChar) //is this the char we're looking for?
{
retVal.first = stream.tellg();
break;
}
else
stream.ignore();
}
return retVal;
}
inline locateToken(tINPStream& stream, const std::string& tag, const tDataBlockOffsets& offset, const WhiteSpace& whiteSpace=g_WhiteSpace)
{
tINPStream::pos_type retVal(-1);
tCharContainer* p = new tCharContainer; //create a new container
p->resize(tag.size()); //reserve enough space to read the tag
tCharBufferRef iobuffer(p); //wrap it in a shared_ptr
stream.clear(); //clear all bad flags
stream.seekg(offset.first, std::ios_base::beg); //seek to starting search location
tDataBlockOffsets nextOffset(offset);
while(!stream.eof() && nextOffset.first < nextOffset.second)
{
charValue = locateCharacter(stream, tag[0], nextOffset);
if (charValue.first != -1) //is this the char we're looking for?
{
tSTDStringContainer tokens;
tINPStream::pos_type bytesRead = readDataUntilEOL(stream, *iobuffer, tokens, whiteSpace);
if (!tokens.empty() && tokens[0] == tag) //is this the token we're looking for?
{
retVal = stream.tellg() - bytesRead; //return position of the first character of the sought string
return retVal; //return the file offset immediatly past the sought string
}
else
{
for (size_t i=0; i<bytesRead; ++i)
stream.unget();
stream.ignore(); //discard the character that triggered this read
nextOffset.first = stream.tellg();
}
}
}
return retVal;
}
inline
bool findBlock(tINPStream& stream, const std::string& tagStart, const std::string& tagEnd, const tDataBlockOffsets& startingOffsets, tDataBlockOffsets& nextOffsets, const WhiteSpace& whiteSpace=g_WhiteSpace)
{
tCharBufferRef iobuffer;
tDataBlockOffsets nextLocation(startingOffsets);
stream.clear(); //clear all error flags
stream.seekg(startingOffsets.first); //starting point of the search
nextOffsets.first = locateToken(stream, tagStart, nextLocation);//offset to the begining of the target token
if (nextOffsets.first == -1)
return false;
nextLocation.first = stream.tellg(); //update search start
nextOffsets.second = locateToken(stream, tagEnd, nextLocation);//offset to the begining of the target token
if (nextOffsets.second == -1)
return false;
//nextOffsets contains two offsets. The first one is the begining of the first token. The second is the begining of second token
//we have to skip only the first token, since the length of the second token does not contribute to the offset.
nextOffsets.first = (streamoff)tokenStartOffset.second + skipWhiteSpace(stream); //skip eol characters if any
return true;
}
inline tCharBufferRef readLine(tINPStream& stream, const tDataBlockOffsets& offsets)
{
tCharContainer* p = new tCharContainer; //create a new container
p->resize(1); //reserve space to initiate a read
tCharBufferRef iobuffer(p); //wrap it in a shared_ptr
stream.seekg(offsets.first); //seek to the desired file offset
tINPStream::pos_type filePosition = stream.tellg(); //current file position
tINPStream::char_type ch;
while (!stream.eof() && filePosition < offsets.second) //do not read beyond the specified range or eof.
{
if (skipEOL(stream)) //is it an eol?
return iobuffer; //found eol. Return.
Thanks for the replies guys.......... I have done it using stl and basic file operations... didnt want to make it too complex... and in my proj i cannot use third party programs like boost ...