I am stuck on a homework assignment where I filter out bad data flags that say -9999 from a massive weather data file, and create a new file from that, I omitted the bad data records however, the next problem basically requires me to omit the columns of data I do not want and I am not sure how to omit those columns
Prompt:
1. (10 Points) Using the original AL Weather Station Data file find all records that have a bad data flag (-9999) for either the PRCP, TMAX or TMIN fields. Produce a new data file (call it Filtered_AL_Weather_Station.txt ) that omits those records with bad data flags. This new file will be used in problem 2. NOTE: The temperatures (TMAX, TMIN) are given in tenths of a degree Celsius. e.g 83 is 8.3 degrees C. (I have already done this program so just ignore it)
2. (15 Points) Using the filtered data file from problem 1, create another file (weather_station_five_column.txt) with only the following five columns of information: (This is the problem I am stuck on.)
STATION NAME DATE PRCP TMAX TMIN
Separate the date fields by inserting spaces and convert the temperatures from Celsius to Fahrenheit.
IMPORTANT: You will need to use the string conversion functions to convert the string type numbers to float or double. The functions are stof and stod. To convert a string to float do this:
string s_tmax; // string type for TMAX
float tmax; // float type for TMAX
// Convert string to float
tmax = stof(s_tmax);
Here is a sample of the output. (You may left justify station name if you like.)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
|
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>
using namespace std;
int main(void)
{
// Variables
string dataline = "";
string station = "";
string tmax_s = "", tmin_s = "", prcp_s = "", date_s = "";
unsigned int pos_station_name = 0;
unsigned int pos_date = 0;
unsigned int pos_tmax = 0;
unsigned int pos_tmin = 0;
unsigned int pos_prcp = 0;
//unsigned int bad_records = 0;
float tmax = 0, tmin = 0, prcp = 0, date = 0;
ifstream infile;
ofstream outfile;
cout << "WEATHER STATION DATA" << endl << endl;
cout << "Open the data file." << endl << endl;
infile.open("/Users/adam/desktop/temp/Filtered_AL_Weather_Station.txt");
if (!infile)
{
cout << "Unable to open the input file. " << endl;
}
else
{
cout << "Data file opened." << endl;
}
outfile.open("/Users/adam/desktop/temp/weather_station_five_column.txt");
if (!outfile)
{
cout << "Unable to open the output file. " << endl;
}
else
{
cout << "Output file opened." << endl;
}
// Use headers to find max and min temp columns
getline(infile, dataline);
outfile << dataline << endl;
pos_station_name = dataline.find("STATION_NAME");
if (pos_station_name <= dataline.length())
{
cout << "STATION_NAME begins at column: " << pos_station_name << endl;
}
else
{
cout << "STATION_NAME not found." << endl;
return 2;
}
pos_date = dataline.find("DATE");
if (pos_date <= dataline.length())
{
cout << "DATE begins at column: " << pos_date << endl;
}
else
{
cout << "DATE not found." << endl;
return 2;
}
pos_prcp = dataline.find("PRCP");
if (pos_prcp <= dataline.length())
{
cout << "PRCP begins at column: " << pos_prcp << endl;
}
else
{
cout << "PRCP not found." << endl;
return 2;
}
pos_tmax = dataline.find("TMAX");
if (pos_tmax <= dataline.length())
{
cout << "TMAX begins at column: " << pos_tmax << endl;
}
else
{
cout << "TMAX not found." << endl;
return 2;
}
pos_tmin = dataline.find("TMIN");
if (pos_tmin <= dataline.length())
{
cout << "TMIN begins at column: " << pos_tmin << endl;
}
else
{
cout << "TMIN not found." << endl;
return 2;
}
getline(infile, dataline);
cout << left << setw(10) <<"STATION\t\t" <<setw(10) << "DATE\t\t" << setw(10) <<"PRCP\t\t" << setw(10) << "TMAX\t\t" << setw(10) << "TMIN" << endl;
// Output the table to the file
outfile << left << setw(10) <<"STATION\t\t" <<setw(10) << "DATE\t\t" << setw(10) <<"PRCP\t\t" << setw(10) << "TMAX\t\t" << setw(10) << "TMIN" << endl;
while (!infile.eof())
{
getline(infile, dataline);
//extract tmax and tmin
cout << setw(10) << station << "\t\t" << date << "\t\t" << tmax << "\t\t" << tmin << endl;
outfile << setw(10) << station << "\t\t" << date << "\t\t" << tmax << "\t\t" << tmin << endl;
station = dataline.substr(pos_station_name, 5);
date_s = dataline.substr(pos_date, 5);
prcp_s = dataline.substr(pos_prcp, 5);
tmax_s = dataline.substr(pos_tmax, 5);
tmin_s = dataline.substr(pos_tmin, 5);
date = stof(date_s);
prcp = stof(prcp_s);
tmax = stof(tmax_s); // string to float
tmin = stof(tmin_s);
infile >> station >> prcp >> tmax >> tmin;
} // End While
// Close the files
infile.close();
outfile.close();
cout << "\n\n";
return 0;
}
|
My filtered weather data txt file output file I use for problem #2 I omitted the bad date flags -9999 for TMAX and TMIN:
STATION STATION_NAME DATE MDPR DAPR PRCP SNWD SNOW TMAX TMIN AWND
----------------- -------------------------------------------------- -------- -------- -------- -------- -------- -------- -------- -------- --------
GHCND:USC00010505 BANKHEAD LOCK AND DAM AL US 20180301 -9999 -9999 0.15 0.0 -9999 82 53 -9999
GHCND:USC00010505 BANKHEAD LOCK AND DAM AL US 20180302 -9999 -9999 0.45 0.0 -9999 83 44 -9999
GHCND:USC00010505 BANKHEAD LOCK AND DAM AL US 20180303 -9999 -9999 0.00 0.0 -9999 83 33 -9999
|
I can't include the full file because it has 4000+ of these entries but my goal is to omit MDPR, SNWD, SNOW, AWWND and just have STATION DATE PRCP TMAX TMIN
What the output is supposed to look like except lined up of course
STATION NAME DATE PCRP TMAX TMIN
BANKHEAD LOCK AND DAM 2018 03 01 0.15 46.76 41.54
BANKHEAD LOCK AND DAM 2018 03 02 0.45 46.94 39.92
BANKHEAD LOCK AND DAM 2018 03 03 0.00 46.94 37.94
BANKHEAD LOCK AND DAM 2018 03 04 0.00 46.76 39.94
|
My output:But my output file is:
STATION STATION_NAME DATE MDPR DAPR PRCP SNWD SNOW TMAX TMIN AWND
STATION DATE PRCP TMAX TMIN
0 0 0
GHCND:USC00010505 20180 82 53
20180 82 53
20180 82 53
20180 82 53
20180 82 53
20180 82 53
20180 82 53
20180 82 53
|