How to split a string into different parts at each period

Hello. So, suppose I had a string, "Hello.World" and I wanted
to get from that, "Hello" and "World". How can I do this easily?
I'd consider using the C library string function strok, parse the string into tokens.
https://en.cppreference.com/w/c/string/byte/strtok
Last edited on
It's not really a Windows question. Just a C++ question.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#include <iostream>
#include <string>
#include <vector>

auto split(const std::string& str, const std::string& delim)
{
    std::vector<std::string> v;
    for (std::size_t last_pos = 0; last_pos < str.size(); )
    {
        std::size_t pos = str.find(delim, last_pos);
        if (pos == str.npos) pos = str.size();
        v.push_back(str.substr(last_pos, pos - last_pos));
        last_pos = pos + delim.size();
    }
    return v;
}

int main()
{
    std::string str {"one.two.three.four.five"};
    auto v {split(str, ".")};
    for (const auto& s: v) std::cout << s << '\n';
}

Nice C++ solution there, dutch, using std::string and C++ functions.
If the delim is a single character, then consider:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#include <iostream>
#include <string>
#include <vector>
#include <sstream>

auto split(const std::string& str, char delim)
{
	std::istringstream iss(str);
	std::vector<std::string> vs;

	for (std::string s; std::getline(iss, s, delim); vs.push_back(std::move(s)));

	return vs;
}

int main()
{
	const std::string str {"one.two.three.four.five"};
	const auto v {split(str, '.')};

	for (const auto& s : v)
		std::cout << s << '\n';
}

Last edited on
It's not really a Windows question. Just a C++ question.


Sorry, that was a mistake.
Thanks! :D
It can be done without any of the C++ specific part of the C++ Standard Library....

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// cl C_Parse.cpp /O1 /Os /MT /EHsc /D "_CRT_SECURE_NO_WARNINGS"
#include <cstdlib>
#include <cstdio>      // 122,368 Bytes With Microsoft Libraries
#include <string>      //   4,608 Bytes With TCLib.lib


size_t iParseCount(const wchar_t* pString, wchar_t cDelimiter)
{
 int iCtr=0;       //reflects # of strings delimited by delimiter.
 const wchar_t* p;                     

 p=pString;
 while(*p)
 {
   if(*p==cDelimiter)
      iCtr++;
   p++;
 }

 return ++iCtr;
}


void Parse(wchar_t** pStrings, const wchar_t* pDelimitedString, wchar_t cDelimiter, int iParseCount)
{
 wchar_t* pBuffer=NULL;             // In Parse() we receive a wchar_t** (pointer to pointer)
 const wchar_t* c;                  // buffer large enough to hold pointers to iParseCount
 wchar_t* p;                        // number of strings delimited by cDelimiter.  The logic
 size_t i=0;                        // used here is to create a secondary buffer as large as
                                    // the string passed in the 2nd parameter, i.e.,
 pBuffer=(wchar_t*)malloc           // pDelimiterString, then work through the pDelimitedString
 (                                  // buffer with a while loop copying characters from
  wcslen(pDelimitedString)*2+2      // pDelimitedString to our new buffer - pBuffer, one
 );                                 // character at a time.  When a delimiter is hit as
 if(pBuffer)                        // specified by cDelimiter, place a null byte in pBuffer
 {                                  // instead of the delimiter so as to null terminate that
    pBuffer[0]=0;                   // string.  The end result of this process of running through
    p=pBuffer;                      // pDelimitedString and substituting nulls in place of
    c=pDelimitedString;             // the delimiter in pBuffer is to create an array of null
    while(*c)                       // terminated strings in pBuffer.  The final part of the
    {                               // algorithm simply needs to collect all those strings
       if(*c==cDelimiter)           // from pBuffer, and place pointers to them in our wchar_t**
          *p=0;                     // pStrings buffer.  Of course, an individual memory alloc-
       else                         // ation with malloc will be needed for each delimited string.
          *p=*c;                    // Then the temporary pBuffer must be released here.  All the
       p++, c++;                    // memory allocations for everything will have to be unwound
    }                               // in main().
    *p=0, p=pBuffer;
    for(int i=0; i<iParseCount; i++)
    {
       pStrings[i]=(wchar_t*)malloc(wcslen(p)*2+2);
       wcscpy(pStrings[i],p);
       p=p+wcslen(pStrings[i])+2;       
    }
    free(pBuffer);
 }
}


void LTrim(wchar_t* pStr)
{
 size_t iCt=0, iLen=0;

 iLen=wcslen(pStr);
 for(size_t i=0; i<iLen; i++)
 {
     if(pStr[i]==32 || pStr[i]==9)
        iCt++;
     else
        break;
 }
 if(iCt)
 {
    for(size_t i=iCt; i<=iLen; i++)
        pStr[i-iCt]=pStr[i];
 }
}


int main()
{
 wchar_t szString[]=L"Zero . One . Two . Three . Four . Five";
 wchar_t** pStrs=NULL;
 size_t iStrings;

 iStrings=iParseCount(szString, L'.');                      // First have to allocate pStrs buffer, which is a pointer
 pStrs=(wchar_t**)malloc(iStrings*sizeof(wchar_t*));        // to pointer buffer (wchar_t**) to hold six wchar_t* pointers
 if(pStrs)                                                  // to the individual strings to be parsed, i.e., "Zero", "One",
 {                                                          // etc.  Then call Parse() function passing in the pStrs **
    Parse(pStrs,szString,L'.',iStrings);                    // buffer, the delimiter character, and the result of ParseCount.
    for(size_t i=0;i<iStrings; i++)                         // After Parse() successfully terminates, display trimmed strings.
    {                                                       // Note that the Parse() function receives the pStr wchar_t**
        LTrim(pStrs[i]);                                    // buffer, but must make individual memory allocations for the
        wprintf(L"%s\t%Iu\n",pStrs[i],wcslen(pStrs[i]));    // six strings, and place those pointers in that 24/48 byte
    }                                                       // buffer.  Here, back in main(), we'll have to release the
    for(size_t i=0;i<iStrings; i++)                         // memory for the six string pointers, then the memory for the
        free(pStrs[i]);                                     // pStrs wchar_t** buffer.  In C++ with Parse() as a member of
    free(pStrs);                                            // my String Class, a simple delete [] pStrs call will release
 }                                                          // the pointer to pointer buffer, but first call destructors on
 getchar();                                                 // the individual Strings.  So memory release is all done in
                                                            // one shot.
 return 0;
}

Not that I have anything against C++ Classes. Actually, I had that code laying around, and I don't even know why I did it, because I use a String::Parse() method of my String Class to do that kind of work, and I created the above code as a non-classed based adaptation from my String Class. I might as well post that too. There's the main program then a super - duper slimmed down version of the String Class reduced by a factor of five or six......

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
// cl MinParse.cpp MinStrings.cpp /O1 /Os /MT /EHsc /W2
// cl MinParse.cpp MinStrings.cpp /O1 /Os /GS- /link TCLib.lib kernel32.lib User32.lib    
//#define TCLib
#ifndef UNICODE
   #define UNICODE
#endif
#ifndef _UNICODE   
   #define _UNICODE
#endif   
#include <windows.h>  
#ifdef TCLib              //    4,608 bytes  TCLib Linkage
   #include "stdio.h"    
   #include "tchar.h"    
#else                     //  139,776 bytes  MS C Runtime Linkage
   #include <cstdio>    
   #include <tchar.h>    
#endif 
#include "MinStrings.h"

int _tmain()
{
 String s1=_T("Zero .  One  . Two . Three . Four . Five  ");
 s1.Print(_T("s1 = "),true);
 int iParseCount=s1.ParseCount(_T('.'));
 _tprintf(_T("iParseCount = %d\n\n"),iParseCount);
 String* pStrs=new String[iParseCount];
 s1.Parse(pStrs, _T('.'), iParseCount);
 for(int i=0; i<iParseCount; i++)
 {
     pStrs[i].Trim();
     pStrs[i].Print(true);
 }
 delete [] pStrs;
 getchar();
 
 return 0;
}


s1 = Zero . One . Two . Three . Four . Five
iParseCount = 6

Zero
One
Two
Three
Four
Five

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
// MinStrings.h
#ifndef Strings_h
#define Strings_h

#ifndef ssize_t
typedef SSIZE_T ssize_t;             // ssize_t is defined in GCC, but isn't defined in VC9-15, but rather SSIZE_T.  For symetry we'll define it.
#endif

#define MINIMUM_ALLOCATION     16    // allocate at least this for every String
#define EXPANSION_FACTOR        2    // repeated concatenations will keep doubling buffer
#define NUMERIC_CONVERSIONS          // allows direct assignment of numeric values to Strings, e.g., String s(3.14159) or s=3.14159;
#define FORMAT_MONEY                 // put commas every three places, rounding, left/right justification, specify field sizes (padding), etc.
#define CONSOLE_OUTPUT               // support console output, i.e., enable String::Print()
#define                       x64
// #define                    debug

class String
{
 public:                                           // Constructors (8)
 String();                                         // Uninitialized Constructor~String();
 String(const TCHAR* pStr);                        // Constructor: Initializes with char*, e.g. s1 = "PowerBASIC! Compile Without Compromise!"
 String& operator=(const TCHAR* pStr);             // Assign a character string to a String Object, e.g.,  String s1 = "Compile Without Compromise";
 #ifdef CONSOLE_OUTPUT
    void Print(bool blnCrLf);                      // Outputs String with or without CrLf
    void Print(const TCHAR* pText, bool blnCrLf);  // Parameter #1 - leading text literal; Parameter #2 - with/without CrLf
 #endif
 int ParseCount(const TCHAR delimiter);                         // Returns count of delimited fields as specified by char delimiter, i.e., comma, space, tab, etc.
 void Parse(String* pStr, TCHAR delimiter, size_t iParseCount); // Parses this based on delimiter.  Must pass in 1st parameter String* to sufficient number of Strings
 void LTrim();                                                  // Removes leading white space by modifying existing String
 void RTrim();                                                  // Removes trailing white space by modifying existing String
 void Trim();                                                   // Removes leading or trailing white space from existing String
 TCHAR* lpStr();                                                // Same as std::string.c_str().  Returns pointer to underlying Z String
 ~String();                                                     // String Destructor

 private:
 TCHAR*    lpBuffer;
 size_t    iLen;
 size_t    iCapacity;
};
 
#endif
// End Strings.h 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
// MinStrings.cpp
//#define TCLib
#define UNICODE
#define _UNICODE
#include  <windows.h>
#ifdef TCLib
   #include  "string.h"
   #include  "stdio.h"
   #include  "tchar.h"
   #define   NEW new
#else
   #include  <string>
   #include  <cstdio>
   #include  <tchar.h>
   #define   NEW new(std::nothrow)
#endif 
#include  "MinStrings.h"


String::String()
{
 this->lpBuffer=new TCHAR[16];
 this->lpBuffer[0]=0;
 this->iLen=0;
 this->iCapacity=15;
}


String::String(const TCHAR* pStr) : lpBuffer(NULL), iLen(0), iCapacity(0)     //Constructor: Initializes with TCHAR*
{
 size_t iStrLen  = _tcslen(pStr);
 size_t iNewSize = (iStrLen/MINIMUM_ALLOCATION+1)*MINIMUM_ALLOCATION;
 this->lpBuffer=NEW TCHAR[iNewSize];
 if(this->lpBuffer)
 {
    _tcscpy(this->lpBuffer,pStr);
    this->iCapacity    = iNewSize-1;
    this->iLen         = iStrLen;
 }
}


String& String::operator=(const TCHAR* pStr)  // Assign TCHAR* to String
{
 size_t iNewLen=_tcslen(pStr);
 if(iNewLen>this->iCapacity)
 {
    delete [] this->lpBuffer;
    int iNewSize=(iNewLen*EXPANSION_FACTOR/16+1)*16;
    this->lpBuffer=new TCHAR[iNewSize];
    this->iCapacity=iNewSize-1;
 }
 _tcscpy(this->lpBuffer,pStr);
 this->iLen=iNewLen;
   
 return *this;
}


int String::ParseCount(const TCHAR delimiter)   //returns one more than # of
{                                               //delimiters so it accurately
 int iCtr=0;                                    //reflects # of strings delimited
 TCHAR* p;                                      //by delimiter.

 p=this->lpBuffer;
 while(*p)
 {
   if(*p==delimiter)
      iCtr++;
   p++;
 }

 return ++iCtr;
}


void String::Parse(String* pStr, TCHAR delimiter, size_t iParseCount)
{
 TCHAR* pBuffer=new TCHAR[this->iLen+1];
 if(pBuffer)
 {
    TCHAR* p=pBuffer;
    TCHAR* c=this->lpBuffer;
    while(*c)
    {
       if(*c==delimiter)
          *p=0;
       else
          *p=*c;
       p++, c++;
    }
    *p=0, p=pBuffer;
    for(size_t i=0; i<iParseCount; i++)
    {
        pStr[i]=p;
        p=p+pStr[i].iLen+1;
    }
    delete [] pBuffer;
 }
}


void String::LTrim()
{
 size_t iCt=0;

 for(size_t i=0; i<this->iLen; i++)
 {
     if(this->lpBuffer[i]==9||this->lpBuffer[i]==10||this->lpBuffer[i]==13||this->lpBuffer[i]==32)
        iCt++;
     else
        break;
 }
 if(iCt)
 {
    for(size_t i=iCt; i<=this->iLen; i++)
        this->lpBuffer[i-iCt]=this->lpBuffer[i];
 }
 this->iLen=this->iLen-iCt;
}


void String::RTrim()
{
 int iCt=0;

 for(int i=this->iLen-1; i>0; i--)
 {
     if(this->lpBuffer[i]==9||this->lpBuffer[i]==10||this->lpBuffer[i]==13||this->lpBuffer[i]==32)
        iCt++;
     else
        break;
 }
 this->lpBuffer[this->iLen-iCt]=0;
 this->iLen=this->iLen-iCt;
}


void String::Trim()
{
 this->LTrim();
 this->RTrim();
}


TCHAR* String::lpStr()
{
 return this->lpBuffer;
}


#ifdef CONSOLE_OUTPUT
   void String::Print(bool blnCrLf)
   {
    _tprintf(_T("%s"),this->lpBuffer);
    if(blnCrLf)
       _tprintf(_T("\n"));
   }

   
   void String::Print(const TCHAR* pStr, bool blnCrLf)
   {
    _tprintf(_T("%s%s"),pStr,lpBuffer);
    if(blnCrLf)
       _tprintf(_T("\n"));
   }
#endif


String::~String()
{
 delete [] this->lpBuffer;
}


You would need to build that with no higher than W2 or you'll get warnings about un secure CRT functions.
Topic archived. No new replies allowed.