Ok, making some progress with this but stuck again. My "common" output is working OK, but to eliminate inclusion of these lines in the second ouput file, I added another array item called "marker" but these don't seem to be working properly:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
|
#include <fstream>
#include <string>
#include <cstdlib>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
using namespace std;
ifstream fin1;
ofstream fout1;
ofstream fout2;
int main()
{
//declare stream objects
fin1.open("mutationlist.dat", ios::in);
fout1.open("common.out", ios::out);
fout2.open ("hits.out", ios::out);
//count number of lines in input file (mutationlist.dat)
int size = 0;
string line;
while ( !fin1.eof() )
{
getline(fin1, line);
size++;
}
//reset input file to beginning
fin1.clear();
fin1.seekg(0,std::ios::beg);
//Make arrays for each data type:
std::string lib[size], locus[size], gene[size], annot[size];
int pos[size], ilib[size], marker[size]; //marker = 0 (unassigned), 1 (assigned)
// read in data and populate arrays
for (int i=1; i<=size; ++i)
{
fin1 >> lib[i-1] >> pos[i-1] >> locus [i-1]>> gene[i-1]; //get first four data columns
std::getline (fin1, annot[i-1]); //get rest of first line
string library = lib[i-1].substr (3,1);
ilib[i-1] = atoi (library.c_str()); //obtain integer value of library
marker[i-1]=0; //assign unmarked value for all lines
if (ilib[i-1] < 3) //assign real value to library
{ilib[i-1] = 8;}
else
{ilib[i-1] = 13;}
}
//output to common.out for identical loci
for (int i=1; i<=size; ++i)
{
for (int j=i; j<size; ++j)
{
if ((pos[i-1] == pos[j]) && (ilib[i-1] != ilib[j])) //same mutation, different libraries, unmarked
{
marker[i-1]=1;
marker[j]=1;
fout1 << lib[i-1] << '\t'<< pos[i-1] << '\t'<< locus[i-1] << '\t'<< gene[i-1] << '\t'<< annot[i-1] << marker[i-1]<<endl;
fout1 << lib[j] << '\t'<< pos[j] << '\t'<< locus[j] << '\t'<< gene[j] << '\t'<< annot[j] << marker[j]<<endl;
}
}
}
//output to hits.out for mutations in same gene
for (int i=1; i<(size+1); ++i)
{
for (int j=i; j<size; ++j)
{
if ((locus[i-1]==locus[j]) && (pos[i-1] != pos[j]) && (ilib[i-1] != ilib[j]) && (marker[i-1] = 0) && (marker[j]=0)) //same locus different mutations, different libraries, unmarked
{
fout2 << lib[i-1] << '\t'<< pos[i-1] << '\t'<< locus[i-1] << '\t'<< gene[i-1] << '\t'<< annot[i-1] << endl;
fout2 << lib[j-1] << '\t'<< pos[j-1] << '\t'<< locus[j-1] << '\t'<< gene[j-1] << '\t'<< annot[j-1] << endl;
}
}
}
return 0;
}
|
I'm using this input file:
L4L1 3071 rcc00003 recF Line1 DNA replication should go to common
L3L3 3071 rcc00003 recF Line2 DNA replication should go to common
L4L1 3265 rcc00003 recF Line3 DNA replication should go to hits
L4L3 3266 rcc00003 recF Line4 DNA replication should go to hits
L3L2 3904 rcc00004 abcD Line5 DNA replication should be unmatched
L3L1 3904 rcc00004 abcD Line6 DNA replication should be unmatched
L3L1 3920 rcc00005 efgH Line7 DNA replication should go to hits twice
L3L3 6685 rcc00005 efgH Line8 nitrilotriacetate should go to hits twice
L2L4 6938 rcc00005 efgH Line9 nitrilotriacetate should go to hits twice
but the problem seems to be with the marker array.
I put some test output lines in to see where things are (these are omited above) but the first time around I succesfully change markers for lines 1 and 2 (lines 58 & 59) for that iteration from 0 to 1. But the next time this line is examined, the marker value is 0 again, so I skip output to hits.txt when I shouldn't.
Can you spot my error please?
Thanks
sicilicide