cpptohtml.cpp

This program does not build cross references or help files from your C or C++ source code. All it does is: replace the characters < > " & and ´ in the source code, which have special meanings in HTML, with their HTML safe equivalents: &lt; &gt; &quot; &amp; and &acute;.

In addition, it can change hard tab characters (´\t´) into a variable number of spaces between 1 and 9, or leave the tabs in.

The result can be placed on an HTML page between <PRE> and </PRE> tags.



/* file cpp2html.cpp
*
*   18-Jan-1999
*   (C) 1999 by Jack Klein
*   All rights reserved
*
*   License granted for free non-commercial use
*
*
*   Operation:
*
*   simple filter to modify C and C++ source code
*   files into a form which can be included into a
*   preformatted block on an HTML page
*
*   replaces the characters '<', '>', '&', '"', and
*   '´' with their descriptive HTML strings
*
*   by default replaces each tab character with enough
*   space characters to simulate tab stops every four
*   columns, may be changed to any value from 1 to 9
*   by the -t1..9 command line switch or disabled with
*   the -t0 command line switch
*
*   the -n command line option adds line numbers to each
*   line in the form of C style comments at the beginning
*   of each line for annotation in the text, in comments
*   so those without column mode editors can compile the
*   source copied from a browser without having to edit
*   the numbers out one-by-one
*/

#include <iostream.h>
#include <iomanip.h>
#include <fstream.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>

const char app_err [] = "cpp2html error: ";

const char usage [] =
"usage: cpp2html [-n] [-t0..9] source_file_name dest_file_name\n"
"    -n:     add line numbers to output file\n"
"    -t0:    disable replacement of tabs with spaces\n"
"    -t1..9: replace tabs with 1..9 spaced [default 4]\n";

int main(int argc, char **argv)
{
    int                 arg_count;
    int                 line_numbers = 0;
    int                 line_count = 0;
    int                 last_char = '\n';
    int                 tab_size = 4;
    int                 column;

    // check for command line options
    for (arg_count = 1; arg_count < argc; ++arg_count)
    {
        if (argv[arg_count][0] == '-')
        {
            switch (argv[arg_count][1])
            {
                case 'n':
                case 'N':
                    cout << argv[arg_count] << " line numbers on" << endl;
                    line_numbers = 1;
                    break;
                case 't':
                case 'T':
                    if (isdigit(argv[arg_count][2]))
                    {
                        tab_size = argv[arg_count][2] - '0';
                    }
                    else
                    {
                        cout << "invalid tab size " << argv[arg_count] << endl;
                    }
                    cout << "tab size " << tab_size << endl;
                    break;
                default:
                    cout << "invalid argument " << argv[arg_count] << endl;
            }
        }
        else
        {
            break;
        }
    }

    // after options (if any), there must be two parameters
    // left, the source and destination file names
    if ((argc - arg_count) < 2)
    {
        cout << usage;
        exit(EXIT_FAILURE);
    }

    // open the input file
    ifstream in(argv[arg_count], ios::in);
    if (in == 0)
    {
        cerr << app_err << " can't open "
        << argv [arg_count] << endl;
        exit(EXIT_FAILURE);
    }

    // open the output file
    ofstream out(argv [++arg_count], ios::out);
    if (out == 0)
    {
        cerr << app_err << " can't make "
        << argv [arg_count] << endl;
        exit(EXIT_FAILURE);
    }

    // read and process the data
    int data;

    while ((data = in.get()) != EOF)
    {
        if (last_char == '\n')
        {
            column = 0;
            if (line_numbers)
            {
                out << setw(3)
                << ++line_count << ": ";
            }
        }

        last_char = data;

        switch (data)
        {
            case '"':
                out << "&#34;";
                break;
            case '&':
                out << "&amp;";
                break;
            case '<':
                out << "&lt;";
                break;
            case '>':
                out << "&gt;";
                break;
            case '´':
                out << "&acute;";
                break;
            case '\t':
                if (tab_size > 0)
                {
                    do
                    {
                        out << ' ';
                    } while (++column % tab_size);
                    --column;
                }
                else
                {
                    out << '\t';
                }
                break;
            default:
                out << (char)data;
                break;
        }
        ++column;
    }

    in.close();
    out.close();

    cout << "\nall done\n" << endl;

    return EXIT_SUCCESS;
}

Note: I updated this page on May 16, because I discovered that the &quot; HTML tag is actually invalid in HTML 3.2!

Having &quot; on a page and validating it with W3C HTML Validation Service returns this message:

Error at line 210: general entity "quot" not defined
and no default entity (explanation...)
Clicking on the explanation link yields the following:
The entity &quot; in conjunction with the HTML 3.2 DOCTYPE. This entity was accidentally omitted from the most recent version of the HTML 3.2 DTD. You should be able to ignore this error safely, though if you wish, you can replace &quot; with the equivalent character entity &#34;.
I am updating all of my pages to validate with this service. On this page, in addition to changing all of the &quot; escape sequences to &#34;, I changed the source code to generate this sequence as well.


[ Top ]
[ C And C++ Code ]
[ Home ]

This page validated by Valid HTML 3.2! W3C HTML Validation Service.

©1999 By Jack Klein. All Rights Reserved.
All trademarks are acknowledged to belong to their respective owners.
Updated 16-May-1999