Special characters taking up more than one slot in char

I'm trying to have this program parse a file containing special characters from the ipa. But the output it gives has both regular characters, correct octal codes, and incorrect octal codes. For example, there is:

1. #p\311,#f\311
2. p\311\231,f\311\231
3. ra#,la#

where the third line is correct, but the first and second are incorrect. \311 represents half of the octal code for a 'ə,' which has true octal code \311\231 (as in number 2). But number two doesn't have three characters in it as it is supposed to because it seems like c++ is treating the octal code \311\231 as two characters rather than one representing a 'ə.' And just for reference, the input file looks like this:

kəna kəna
niŋ mi
(e)wei wæi
emə əmə

Any ideas about how I can fix this? I've put my code below.

My code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#include "sysCorr.h"
#include <algorithm> 

using namespace std;

//takes a letter, a number of languages, deleteParentheses/keepParentheses, a read file, and a write file
int main(int argc, char** argv){
  if (argc != 6){
    throw runtime_error("Incorrect number of arguments. Please give a letter, a number of languages, 'deleteParentheses'/'keepParentheses',a read file, and a write file");
  }
  char letter = *argv[1];
  int numLang = (int)(argv[2][0] - '0');//convert char to int
  string parenString = argv[3];//whether or not to delete parentheses
  string readName = argv[4];//saves command line read file
  string writeName = argv[5];
  ofstream fileToWrite;

  vector<vector<string> > wordList;
  wordList = readFile(readName,numLang,wordList,parenString);

  vector<vector<string> > parsedList;
  parsedList = parseList(wordList,numLang);
  //letterList
  fileToWrite.open(writeName.c_str()); 
  for (int i=0;i<parsedList[0].size();i++){
    fileToWrite<<parsedList[0][i]<<",";
    fileToWrite<<parsedList[1][i]<<endl;
  } 
  return 1;
}

vector<vector<string> > readFile(string readName, int numLang, vector<vector<string> > wordList,string parenString){
  ifstream file;
  file.open(readName.c_str());
  if (!file.good()) {
    throw runtime_error("error opening file");
  }
  vector<string> cognates;
  string word;
  //cout<<numLang<<endl;
  while (!file.eof()){
    for (int i=0; i<numLang; i++){
      file >> word;
      word = editCharacters(word,parenString);
      cognates.push_back(word);
    }
    if (!file.eof()){//get rid of duplicate word at end
      wordList.push_back(cognates);
    }
    cognates.clear();
  }
  return wordList;
}

string editCharacters(string word,string parenString){
  if (word[word.size()-1] != '-'){
    word.insert(word.size(), 1, '#');
  }
  if (word[0] != '-'){
    word.insert(0, 1, '#');
  }
  //remove parentheses if asked
  size_t openParenLoc = word.find('(');
  size_t closeParenLoc = word.find(')');
  if ((openParenLoc!=string::npos) && (closeParenLoc!=string::npos)){
    if (parenString == "deleteParentheses"){
      word.erase(openParenLoc,closeParenLoc-openParenLoc+1);
    } else if (parenString == "keepParentheses"){
      string pos1 = word.substr(0,openParenLoc);
      string pos2 = word.substr(openParenLoc+1,closeParenLoc-openParenLoc-1);
      string pos3 = word.substr(closeParenLoc+1,word.size()-closeParenLoc+1);
      word = pos1+pos2+pos3;
    } else {
      throw runtime_error("choose 'deleteParentheses' or 'keepParentheses'");
    }
  }
  return word;
}

vector<vector<string> > parseList(vector<vector<string> > wordList, int numLang){
  vector<vector<string> > words;
  vector<string> languageVect;
  int a=0;
  while (a<numLang){
    words.push_back(vector<string>());
    a++;
  }
  for (int i=0;i<wordList.size();i++){//each word in all languages
    int longestLangIndex = longestIndex(wordList[i]);
    cout<<"size: "<<wordList[i].size()<<endl;
    for (int j=0;j<wordList[i].size();j++){
      permutations(wordList[i][j], words,longestLangIndex, j);
      if (j!=longestLangIndex){
        int suppLength = wordList[i][longestLangIndex].size()-wordList[i][j].size();
        for (int k = 0;k<suppLength;k++){
          words[j].push_back("");
        }
      }
    }
  }
  //where first [] is language and second [] is permutation number
  for (int i=0;i<words[1].size();i++){
    cout<<words[1][i]<<endl;
  }
  return words;
}

void permutations(string word, vector<vector<string> >& words, int longestLangIndex, int j){
  string perm;
  for (int a=1;a<word.length()-1;a++){//go from 2nd to 2nd to last letter for the language
    perm = "";
    perm+=word[a-1];
    perm+=word[a];
    perm+=word[a+1];
    //cout<<perm<<endl;
    words[j].push_back(perm);

    //std::vector<string>::iterator it = find(permList.begin(),permList.end(), perm);
    //if (it != permList.end()){
  }
}

int longestIndex(vector<string> wordLang){
  int gIndex = 0;
  for (int i=0;i<wordLang.size();i++){//size is 2 right now
    if (wordLang[i]>wordLang[gIndex]){
      gIndex = i;
    }
  }
  return gIndex;
}



--
Thank you!
Last edited on
Can you post your original assignment?
Topic archived. No new replies allowed.