How To: Parse A Text File In C++

This example shows how to read a text file line-by-line, and separate out the individual words in each line (as separated by blanks).
#include <iostream>
using std::cout;
using std::endl;

#include <fstream>
using std::ifstream;

#include <cstring>

const int MAX_CHARS_PER_LINE = 512;
const int MAX_TOKENS_PER_LINE = 20;
const char* const DELIMITER = " ";

int main()
{
  // create a file-reading object
  ifstream fin;
  fin.open("data.txt"); // open a file
  if (!fin.good()) 
    return 1; // exit if file not found
  
  // read each line of the file
  while (!fin.eof())
  {
    // read an entire line into memory
    char buf[MAX_CHARS_PER_LINE];
    fin.getline(buf, MAX_CHARS_PER_LINE);
    
    // parse the line into blank-delimited tokens
    int n = 0; // a for-loop index
    
    // array to store memory addresses of the tokens in buf
    const char* token[MAX_TOKENS_PER_LINE] = {0}; // initialize to 0
    
    // parse the line
    token[0] = strtok(buf, DELIMITER); // first token
    if (token[0]) // zero if line is blank
    {
      for (n = 1; n < MAX_TOKENS_PER_LINE; n++)
      {
        token[n] = strtok(0, DELIMITER); // subsequent tokens
        if (!token[n]) break; // no more tokens
      }
    }

    // process (print) the tokens
    for (int i = 0; i < n; i++) // n = #of tokens
      cout << "Token[" << i << "] = " << token[i] << endl;
    cout << endl;
  }
  return 0;
}

The Input File:


COURSE    DESCRIPTION: 
This course provides detailed coverage of the concepts and syntax of the C++ Language.
Coverage includes inheritance, overloaded operators, overloaded default operators,
virtual functions, memory management, files, streams, templates, and exceptions.

The Output:


Token[0] = COURSE
Token[1] = DESCRIPTION:

Token[0] = This
Token[1] = course
Token[2] = provides
Token[3] = detailed
Token[4] = coverage
Token[5] = of
Token[6] = the
Token[7] = concepts
Token[8] = and
Token[9] = syntax
Token[10] = of
Token[11] = the
Token[12] = C++
Token[13] = Language.

Token[0] = Coverage
Token[1] = includes
Token[2] = inheritance,
Token[3] = overloaded
Token[4] = operators,
Token[5] = overloaded
Token[6] = default
Token[7] = operators,

Token[0] = virtual
Token[1] = functions,
Token[2] = memory
Token[3] = management,
Token[4] = files,
Token[5] = streams,
Token[6] = templates,
Token[7] = and
Token[8] = exceptions.