Okay, I have used what you gave me, and I know my code is a little sloppy with comments. I was just trying to get all my thoughts out.
I'm not really sure how to proceed. I have my data in the struct, and I can access it. I have to calculate the distance for each data set, find the nearest hit (closest distance of same class type) and the nearest miss(closest distance of opposite class type), then use the relief algorithm to rank the features, which is: W(F1) = (0+[-(diff(point1, near hit)^2+(diff(point1,near miss)^2]+[-(diff(point2, near hit)^2+(diff(point2, near miss)^2] +...[-(diff(pointn, near hit)^2]+(diff(pointn, near miss)))/number of samples. Then I will do the same with F2 and output the features in order of highest score to lowest score.
I'm telling you all this, so you can see where I'm going. What is the best way to procede? My initial idea to store the differences in an array isn't going to work because it's impossible to tell which points the data originated from. The function doesn't work right, anyways. I just didn't want to invest more time in it, if it's not going to do what I need it to do.
I thought to make another struct named distance to hold the distance info, but I can't seem to figure out how to pack all the data I need into the rest of this program. Help, please! I just need ideas of how to pack all this into a neat little package. It seems so messy and all over the place to me.
Here is what I have done so far: As I said above, the distance function is wrong, and I know it. Just left it as is so you could see what I'm working with.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
|
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <cmath>
using namespace std;
string file_name;
string current_line;
int m,n;
double F1, F2;
char class_type;
ifstream input(file_name);
ofstream output;
//struct for each sample
struct sample
{
double F1;
double F2;
char class_type;
};
sample p[6]; //array of 6 samples
sample data;//each individual sample
double q[100]; //array to hold distances
void print(sample Array[], ofstream& output);
void clean_data(string &str);
void distance(sample p[], double q[], ofstream& output);
int main()
{
cout << "Enter the number of samples in the data set:\n";
cin >> m;
cout << "Enter the number of features in the data set:\n";
cin >> n;
cout << "Enter the name of the data file (append\".txt\"):\n";
cin >> file_name;
input.open(file_name.c_str());
output.open("output.txt");
for (int i = 0; i < m && !input.eof(); i++)
{
getline(input, current_line);
clean_data(current_line);
istringstream ss(current_line);
ss >> F1 >> F2 >> class_type;
p[i].F1 = F1; //assigns the values from file into the array of structs
p[i].F2 = F2;
p[i].class_type = class_type;
}
print(p,output);
distance(p,q,output);
output.close();
input.close();
system("pause");
return 0;
}
//output to text file
void print(sample Array[], ofstream& output)
{
for ( int i=0; i<m; i++ )
output << setw(15) << Array[i].F1 <<setw(15)
<< Array[i].F2 << setw(40)
<< Array[i].class_type << endl;
}
//removes commas
void clean_data(string &str)
{
size_t pos=0;
while ((pos = str.find(',', pos)) != string::npos )
str[pos] = ' ';
}
void distance(sample p[], double q[], ofstream& output)
{
double distance;
for (int i = 0; i < m; i++)
{
for (int j = 1; j < m; j++)
distance = sqrt(((p[i].F1 - p[j].F1)*(p[i].F1 - p[j].F1))-((p[i].F2 - p[j].F2)*(p[i].F2 - p[j].F2)));
q[i] = distance;
output << q[i] << endl;
}
}
//function to find near miss and near hit and store in array for each data point
//check class type
//for same class, return smallest difference as near hit
//for diff class, return smallest difference as near miss
//store each value in an array
//function to calculate releif algorithm
//FOR EACH FEATURE
//make near hit a negative value
//add near miss to near hit
//square the sum and add to the next instance
//
|