Exercise 6-6. Implement a simple version of the
#defineprocessor (i.e., no arguments) suitable for use with C programs, based on the routines of this section. You may also findgetchandungetchuseful.
The processor reads C source line by line. When it sees #define NAME replacement it calls install from the hash table (Exercise 6-5). For every other token it reads, if the token matches a defined name it outputs the replacement text; otherwise it outputs the token unchanged. Whitespace and non-identifier characters are passed through verbatim.
Solution
/* K&R Exercise 6-6 — simple #define preprocessor (no arguments)
* Compile: gcc -ansi -Wall ex6-6.c -o ex6-6
* Usage: ./ex6-6 < source.c */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
/* ---- hash table (from K&R Section 6.6 + Exercise 6-5) ---- */
#define HASHSIZE 101
struct nlist {
struct nlist *next;
char *name;
char *defn;
};
static struct nlist *hashtab[HASHSIZE];
static unsigned int hash(const char *s)
{
unsigned int v = 0;
for ( ; *s; s++)
v = (unsigned int)*s + 31u * v;
return v % HASHSIZE;
}
static struct nlist *lookup(const char *s)
{
struct nlist *np;
for (np = hashtab[hash(s)]; np; np = np->next)
if (strcmp(s, np->name) == 0)
return np;
return NULL;
}
static struct nlist *install(const char *name, const char *defn)
{
struct nlist *np;
unsigned int hval;
if ((np = lookup(name)) == NULL) {
np = (struct nlist *)malloc(sizeof(*np));
if (!np) return NULL;
np->name = (char *)malloc(strlen(name) + 1);
if (!np->name) { free(np); return NULL; }
strcpy(np->name, name);
hval = hash(name);
np->next = hashtab[hval];
hashtab[hval] = np;
np->defn = NULL;
} else {
free(np->defn);
}
np->defn = (char *)malloc(strlen(defn) + 1);
if (!np->defn) return NULL;
strcpy(np->defn, defn);
return np;
}
/* ---- one-character pushback ---- */
static int _buf = EOF;
static int getch(void) { int c = _buf; _buf = EOF; return (c != EOF) ? c : getchar(); }
static void ungetch(int c) { _buf = c; }
/* ---- preprocessor ---- */
/* Read one identifier into word[], return length */
static int read_ident(char *word, int lim)
{
int c;
char *w = word;
while (--lim > 0) {
c = getch();
if (!isalnum(c) && c != '_') { ungetch(c); break; }
*w++ = c;
}
*w = '\0';
return (int)(w - word);
}
/* Skip horizontal whitespace, return first non-space char (pushed back) */
static void skip_hspace(void)
{
int c;
while ((c = getch()) == ' ' || c == '\t')
;
ungetch(c);
}
/* Read from current position to end-of-line into buf (strips trailing space) */
static void read_to_eol(char *buf, int lim)
{
int c;
char *p = buf, *end = buf + lim - 1;
while ((c = getch()) != '\n' && c != EOF && p < end)
*p++ = c;
if (c == '\n') ungetch(c); /* put \n back so main loop sees it */
/* strip trailing whitespace */
while (p > buf && (*(p-1) == ' ' || *(p-1) == '\t'))
p--;
*p = '\0';
}
int main(void)
{
int c;
char word[512], defn[512];
struct nlist *np;
while ((c = getch()) != EOF) {
/* ---- preprocessor directive ---- */
if (c == '#') {
/* read directive name */
skip_hspace();
read_ident(word, sizeof word);
if (strcmp(word, "define") == 0) {
/* read the macro name */
skip_hspace();
read_ident(word, sizeof word);
if (word[0] == '\0') { putchar('#'); continue; }
/* read the replacement text */
skip_hspace();
read_to_eol(defn, sizeof defn);
install(word, defn);
/* consume the newline that read_to_eol pushed back */
getch();
putchar('\n');
} else {
/* not #define — pass through unchanged */
printf("#%s", word);
}
continue;
}
/* ---- identifier: possibly a macro name ---- */
if (isalpha(c) || c == '_') {
word[0] = c;
read_ident(word + 1, (int)sizeof(word) - 1);
if ((np = lookup(word)) != NULL)
fputs(np->defn, stdout);
else
fputs(word, stdout);
continue;
}
/* ---- everything else: pass through ---- */
putchar(c);
}
return 0;
}
Compile and Test
gcc -ansi -Wall ex6-6.c -o ex6-6
printf "#define MAX 100\n#define PI 3.14159\nif (x > MAX) y = PI;\n" | ./ex6-6
Expected Output
if (x > 100) y = 3.14159;
(The blank first line is from the #define directive being consumed and replaced with a newline.)
Test with a Longer Input
printf "#define BUFSIZE 512\n#define TRUE 1\n#define FALSE 0\nchar buf[BUFSIZE];\nint flag = TRUE;\nif (!flag) flag = FALSE;\n" | ./ex6-6
Expected Output
char buf[512]; int flag = 1; if (!flag) flag = 0;
Design Notes
- Character-at-a-time scanning with pushback —
getch/ungetchallow one character of lookahead; enough to check whether a#starts a directive orisalphastarts an identifier - What this does NOT handle — macro arguments,
#ifdef/#ifndef, multi-line definitions (backslash continuation), token pasting (##), and nested macros. A full C preprocessor requires multiple passes and a recursive expansion engine - Passing non-define directives through — lines like
#includeare printed unchanged; the scanner reads the directive word and re-emits#word, then the rest of the line flows through the character-by-character path
What This Exercise Teaches
- Building on previous exercises — Exercise 6-6 assembles the hash table (Section 6.6),
getch/ungetch(Section 4.3), and a simple token scanner into a working tool; this is how real tools are built from composable pieces - Lexer structure — the main loop is a classic lexer: switch on the first character to decide what kind of token follows, then dispatch to a handler
- Scope of a “simple” implementation — knowing what you are NOT handling is as important as what you are; the design notes above make the limitations explicit
Set Up Your C Environment
← Exercise 6-5 |
Chapter 6 Solutions |
Chapter 7 Solutions →
Book: The C Programming Language, 2nd Ed — Kernighan & Ritchie