Removing Comments Using A State Machine

/* comments.cpp
**
** a program to remove comments from C or C++ source code
** using a state machine
**
** 15-Jul-2002 Jack Klein
**
** (C)Copyright 2002 by Jack Klein
** All rights reserved
**
** Permission granted for non-commercial use
*/

#include <iostream.h>

typedef enum {
   SOURCE,
   STRING,
   CHAR_LITERAL,
   SLASH,
   LINE_COMMENT,
   BLOCK_COMMENT,
   ASTERISK
} State;

class CommentState
{
   private:
   State current_state;
   int last_char;
   long line_count;

   public:
   CommentState()
   {
      current_state = SOURCE;
      last_char = '\n';
      line_count = 1;
   }
   State GetCurrentState() { return current_state; }
   int GetLastChar() { return last_char; }
   void ProcessStateEvent(int);
};

void CommentState::ProcessStateEvent(int next_char)
{
   switch (current_state)
   {
      case SOURCE:
         switch (next_char)
         {
            case '"':
               current_state = STRING;
               cout << next_char;
               break;

            case '/':
               current_state = SLASH;
               break;

            case '\'':
               current_state = CHAR_LITERAL;
               cout << next_char;
               break;
            
            default:
               cout << next_char;
               break;
         }
      break;

      case STRING:
         switch (next_char)
         {
            case '"':
               if (last_char != '\\')
               {
                  current_state = SOURCE;
               }
               break;

            case '\n':
               if (last_char != '\\')
               {
                  cerr << "Warning line " << line_count <<  "Newline in string!" << endl;
                  current_state = SOURCE;
               }
               break;
            default:
            break;
         }
         cout << next_char;
      break;

      case CHAR_LITERAL:
         if (next_char == '\'' && last_char != '\\')
         {
            current_state = SOURCE;
         }
         cout << next_char;
         break;
         
      case SLASH:
         switch (next_char)
         {
            case '/':
               current_state = LINE_COMMENT;
               cout << ' ';
               break;
            case '*':
               current_state = BLOCK_COMMENT;
               cout << ' ';
               break;
            default:
               current_state = SOURCE;
               cout << '/';
               cout << next_char;
         }
      break;

      case LINE_COMMENT:
         if (next_char == '\n')
         {
            current_state = SOURCE;
            cout << '\n';
         }
      break;

      case BLOCK_COMMENT:
         if (next_char == '*')
         {
            current_state = ASTERISK;
         }
         else if (next_char == '\n')
         {
            cout << '\n';
         }
      break;

      case ASTERISK:
         if (next_char == '/')
         {
            current_state = SOURCE;
         }
         else if (next_char != '*')
         {
            current_state = BLOCK_COMMENT;
            if (next_char == '\n')
            {
               cout << '\n';
            }
         }
      break;
   }

   last_char = next_char;
}

int main(void)
{
   CommentState FileState;
   int ch;

   while ((ch = cin.get()) != EOF)
   {
      FileState.ProcessStateEvent(ch);
   }

   cout << "File ended ";

   if (FileState.GetLastChar() != '\n')
   {
      cout << "witnout \\n";
   }
   else
   {
      switch (FileState.GetCurrentState())
      {
         case SOURCE:
            cout << "correctly";
            break;
         case STRING:
            cout << "in a string literal";
            break;
         case CHAR_LITERAL:
            cout << "in a character literal";
            break;
         case BLOCK_COMMENT:
            cout << "in a comment";
            break;
         default:
            cout << "incorrectly";
            break;
      }
   }

   cout << endl;
   return 0;
}

[ Top ]
[ C And C++ Main Page ]
[ Home ]

©2002 By Jack Klein. All Rights Reserved.
All trademarks are acknowledged to belong to their respective owners.