K&R C Programs Exercise 6-6

Exercise 6-6. Implement a simple version of the #define processor (i.e., no arguments) suitable for use with C programs, based on the routines of this section. You may also find getch and ungetch useful.

The processor reads C source line by line. When it sees #define NAME replacement it calls install from the hash table (Exercise 6-5). For every other token it reads, if the token matches a defined name it outputs the replacement text; otherwise it outputs the token unchanged. Whitespace and non-identifier characters are passed through verbatim.

Solution

/* K&R Exercise 6-6 — simple #define preprocessor (no arguments)
 * Compile: gcc -ansi -Wall ex6-6.c -o ex6-6
 * Usage:   ./ex6-6 < source.c */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

/* ---- hash table (from K&R Section 6.6 + Exercise 6-5) ---- */
#define HASHSIZE 101

struct nlist {
    struct nlist *next;
    char         *name;
    char         *defn;
};

static struct nlist *hashtab[HASHSIZE];

static unsigned int hash(const char *s)
{
    unsigned int v = 0;
    for ( ; *s; s++)
        v = (unsigned int)*s + 31u * v;
    return v % HASHSIZE;
}

static struct nlist *lookup(const char *s)
{
    struct nlist *np;
    for (np = hashtab[hash(s)]; np; np = np->next)
        if (strcmp(s, np->name) == 0)
            return np;
    return NULL;
}

static struct nlist *install(const char *name, const char *defn)
{
    struct nlist *np;
    unsigned int  hval;
    if ((np = lookup(name)) == NULL) {
        np = (struct nlist *)malloc(sizeof(*np));
        if (!np) return NULL;
        np->name = (char *)malloc(strlen(name) + 1);
        if (!np->name) { free(np); return NULL; }
        strcpy(np->name, name);
        hval = hash(name);
        np->next    = hashtab[hval];
        hashtab[hval] = np;
        np->defn = NULL;
    } else {
        free(np->defn);
    }
    np->defn = (char *)malloc(strlen(defn) + 1);
    if (!np->defn) return NULL;
    strcpy(np->defn, defn);
    return np;
}

/* ---- one-character pushback ---- */
static int _buf = EOF;
static int  getch(void)    { int c = _buf; _buf = EOF; return (c != EOF) ? c : getchar(); }
static void ungetch(int c) { _buf = c; }

/* ---- preprocessor ---- */

/* Read one identifier into word[], return length */
static int read_ident(char *word, int lim)
{
    int c;
    char *w = word;
    while (--lim > 0) {
        c = getch();
        if (!isalnum(c) && c != '_') { ungetch(c); break; }
        *w++ = c;
    }
    *w = '\0';
    return (int)(w - word);
}

/* Skip horizontal whitespace, return first non-space char (pushed back) */
static void skip_hspace(void)
{
    int c;
    while ((c = getch()) == ' ' || c == '\t')
        ;
    ungetch(c);
}

/* Read from current position to end-of-line into buf (strips trailing space) */
static void read_to_eol(char *buf, int lim)
{
    int c;
    char *p = buf, *end = buf + lim - 1;
    while ((c = getch()) != '\n' && c != EOF && p < end)
        *p++ = c;
    if (c == '\n') ungetch(c);   /* put \n back so main loop sees it */
    /* strip trailing whitespace */
    while (p > buf && (*(p-1) == ' ' || *(p-1) == '\t'))
        p--;
    *p = '\0';
}

int main(void)
{
    int c;
    char word[512], defn[512];
    struct nlist *np;

    while ((c = getch()) != EOF) {

        /* ---- preprocessor directive ---- */
        if (c == '#') {
            /* read directive name */
            skip_hspace();
            read_ident(word, sizeof word);

            if (strcmp(word, "define") == 0) {
                /* read the macro name */
                skip_hspace();
                read_ident(word, sizeof word);
                if (word[0] == '\0') { putchar('#'); continue; }

                /* read the replacement text */
                skip_hspace();
                read_to_eol(defn, sizeof defn);

                install(word, defn);
                /* consume the newline that read_to_eol pushed back */
                getch();
                putchar('\n');
            } else {
                /* not #define — pass through unchanged */
                printf("#%s", word);
            }
            continue;
        }

        /* ---- identifier: possibly a macro name ---- */
        if (isalpha(c) || c == '_') {
            word[0] = c;
            read_ident(word + 1, (int)sizeof(word) - 1);
            if ((np = lookup(word)) != NULL)
                fputs(np->defn, stdout);
            else
                fputs(word, stdout);
            continue;
        }

        /* ---- everything else: pass through ---- */
        putchar(c);
    }
    return 0;
}

Compile and Test

gcc -ansi -Wall ex6-6.c -o ex6-6

printf "#define MAX 100\n#define PI 3.14159\nif (x > MAX) y = PI;\n" | ./ex6-6

Expected Output


if (x > 100) y = 3.14159;

(The blank first line is from the #define directive being consumed and replaced with a newline.)

Test with a Longer Input

printf "#define BUFSIZE 512\n#define TRUE 1\n#define FALSE 0\nchar buf[BUFSIZE];\nint flag = TRUE;\nif (!flag) flag = FALSE;\n" | ./ex6-6

Expected Output


char buf[512];
int flag = 1;
if (!flag) flag = 0;

Design Notes

  • Character-at-a-time scanning with pushbackgetch/ungetch allow one character of lookahead; enough to check whether a # starts a directive or isalpha starts an identifier
  • What this does NOT handle — macro arguments, #ifdef/#ifndef, multi-line definitions (backslash continuation), token pasting (##), and nested macros. A full C preprocessor requires multiple passes and a recursive expansion engine
  • Passing non-define directives through — lines like #include are printed unchanged; the scanner reads the directive word and re-emits #word, then the rest of the line flows through the character-by-character path

What This Exercise Teaches

  • Building on previous exercises — Exercise 6-6 assembles the hash table (Section 6.6), getch/ungetch (Section 4.3), and a simple token scanner into a working tool; this is how real tools are built from composable pieces
  • Lexer structure — the main loop is a classic lexer: switch on the first character to decide what kind of token follows, then dispatch to a handler
  • Scope of a “simple” implementation — knowing what you are NOT handling is as important as what you are; the design notes above make the limitations explicit

Set Up Your C Environment

← Exercise 6-5  | 
Chapter 6 Solutions  | 
Chapter 7 Solutions →

Book: The C Programming Language, 2nd Ed — Kernighan & Ritchie

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>