How To: Parse A Text File In C++
This example shows how to read a text file line-by-line, and separate out the
individual words in each line (as separated by blanks).
#include <iostream>
using std::cout;
using std::endl;
#include <fstream>
using std::ifstream;
#include <cstring>
const int MAX_CHARS_PER_LINE = 512;
const int MAX_TOKENS_PER_LINE = 20;
const char* const DELIMITER = " ";
int main()
{
// create a file-reading object
ifstream fin;
fin.open("data.txt"); // open a file
if (!fin.good())
return 1; // exit if file not found
// read each line of the file
while (!fin.eof())
{
// read an entire line into memory
char buf[MAX_CHARS_PER_LINE];
fin.getline(buf, MAX_CHARS_PER_LINE);
// parse the line into blank-delimited tokens
int n = 0; // a for-loop index
// array to store memory addresses of the tokens in buf
const char* token[MAX_TOKENS_PER_LINE] = {0}; // initialize to 0
// parse the line
token[0] = strtok(buf, DELIMITER); // first token
if (token[0]) // zero if line is blank
{
for (n = 1; n < MAX_TOKENS_PER_LINE; n++)
{
token[n] = strtok(0, DELIMITER); // subsequent tokens
if (!token[n]) break; // no more tokens
}
}
// process (print) the tokens
for (int i = 0; i < n; i++) // n = #of tokens
cout << "Token[" << i << "] = " << token[i] << endl;
cout << endl;
}
return 0;
}
The Input File:
COURSE DESCRIPTION:
This course provides detailed coverage of the concepts and syntax of the C++ Language.
Coverage includes inheritance, overloaded operators, overloaded default operators,
virtual functions, memory management, files, streams, templates, and exceptions.
The Output:
Token[0] = COURSE
Token[1] = DESCRIPTION:
Token[0] = This
Token[1] = course
Token[2] = provides
Token[3] = detailed
Token[4] = coverage
Token[5] = of
Token[6] = the
Token[7] = concepts
Token[8] = and
Token[9] = syntax
Token[10] = of
Token[11] = the
Token[12] = C++
Token[13] = Language.
Token[0] = Coverage
Token[1] = includes
Token[2] = inheritance,
Token[3] = overloaded
Token[4] = operators,
Token[5] = overloaded
Token[6] = default
Token[7] = operators,
Token[0] = virtual
Token[1] = functions,
Token[2] = memory
Token[3] = management,
Token[4] = files,
Token[5] = streams,
Token[6] = templates,
Token[7] = and
Token[8] = exceptions.