1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
|
#include "histogram.h"
//#include "arrayUtils.h"
#include <string>
using namespace std;
struct CountedLocations {
string Location;
int numHits;
};
// Extract the quoted request from a line containing a log entry
string extractTheRequest(string logEntry)
{
string::size_type requestStart = logEntry.find('"');
string::size_type requestEnd = logEntry.rfind('"');
if (requestStart != requestEnd)
{
return logEntry.substr(requestStart+1, requestEnd-requestStart-1);
}
else
return "";
}
template <typename T>
void addElement (T* array, int& size, int index, T value)
{
// Make room for the insertion
int toBeMoved = size - 1;
while (toBeMoved >= index) {
array[toBeMoved+1] = array[toBeMoved];
--toBeMoved;
}
// Insert the new value
array[index] = value;
++size;
}
int binarySearch(const CountedLocations list[], int listLength, string searchItem)
{
int first = 0;
int last = listLength - 1;
int mid;
string midFind;
bool found = false;
while (first <= last && !found)
{
mid = (first + last) / 2;
midFind = list[mid].Location;
if (midFind == searchItem)
found = true;
else
if (searchItem < midFind)
last = mid - 1;
else
first = mid + 1;
}
if (found)
return mid;
else
return -1;
}
// Check to see if this is a GET request
bool isAGet (string request)
{
return request.size() > 3 && request.substr(0,4) == "GET ";
}
// Extract the locator part of a GET request
string extractLocator (string request)
{
// strip off the GET
string::size_type locatorStart = 3;
// Skip any blanks
while (request[locatorStart] == ' ')
++ locatorStart;
// The locator ends at the first blank or ? after that.
string::size_type locatorEnd = locatorStart;
while (request[locatorEnd] != ' ' && request[locatorEnd] != '?')
++ locatorEnd;
string locator = request.substr(locatorStart, locatorEnd-locatorStart);
return locator;
}
//template <typename T>
int addInOrder (CountedLocations array[], int& size, string value)
{
// Make room for the insertion
int toBeMoved = size - 1;
while (toBeMoved >= 0 && value < array[toBeMoved].Location) {
array[toBeMoved+1] = array[toBeMoved];
--toBeMoved;
}
// Insert the new value
array[toBeMoved+1].Location = value;
++size;
return toBeMoved+1;
}
// Read log entries from the provided input stream, extracting the request
// portion from each. If the request is a GET, extract the locator (throwing
// away any part after a '?') and update a count of how many times that locator
// has appeared.
//
// Write the resulting wlocators and counts (in alphabetic order by locator)
// in CSV format to the output stream.
//
// - Assume that the input contains a maximum of MaxPages distinct locators.
// If more distinct locators than this are actually
// encountered, write nothing to the output stream but print an error
// message on the standard error stream.
void histogram(const int MaxPages, istream& input, ostream& output)
{
// Step 1 - set up the data
// Data should be stored in two parallel arrays, locators and counts.
// locators[i] will be the i_th locator string and counts[i] will be the
// count of how many times that particular locator has been requested.
CountedLocations C [MaxPages];
int nLocators = 0;
// Step 2 - read and count the requested locators
string logEntry;
getline (input, logEntry);
while (input)
{
string request = extractTheRequest(logEntry);
if (isAGet(request))
{
string locator = extractLocator(request);
int position = binarySearch (C, nLocators, locator);
if (position >= 0)
{
// We found this locator already in the array.
// Increment its count
C[position].numHits+=1;
}
else
{
if (nLocators < MaxPages)
{
// This is a new locator. Add it.
position = addInOrder (C, nLocators, locator);
// And add a count of one at the corresponding position
// in the counts array
//int nCounts = nLocators - 1;
C[position].numHits = 1;
//addElement (CountedLocations, nCounts, position, 1);
}
else
{
// Not enough room in the arrays
++nLocators;
}
}
}
getline (input, logEntry);
}
// Step 3 - write the output report
string Location;
int Hits;
if (nLocators <= MaxPages)
{
for (int i = 0; i < nLocators; ++i)
{
Location = C[i].Location;
Hits = C[i].numHits;
}
output << "\"" << Location << "\"," << Hits << endl;
}
else
{
cerr << "Input file contains more than " << MaxPages << " locators." << endl;
}
// Step 4 - cleanup
}
|