K&R C Programs Exercise 6-2

Exercise 6-2. Write a program that reads a C program and prints in alphabetical order each group of variable names that are identical in the first 6 characters, but different somewhere thereafter. Don’t count words within strings and comments. Make 6 a parameter.

The strategy:

  1. Use the improved getword from Exercise 6-1 to read identifiers, skipping strings, comments, and preprocessor lines
  2. Insert each unique identifier into a binary tree (duplicates are silently dropped)
  3. Do an in-order traversal to produce an alphabetically sorted list; during traversal, keep a rolling “previous word” and check whether it shares a prefix of length N with the current word
  4. Group and print such identifiers together

Solution

/* K&R Exercise 6-2 — print groups of identifiers sharing a first-N-char prefix
 * Compile: gcc -ansi -Wall ex6-2.c -o ex6-2
 * Usage:   ./ex6-2 [N] < source.c        (N defaults to 6) */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

#define MAXWORD 100

static int _buf = EOF;
int  getch(void)    { int c = _buf; _buf = EOF; return (c != EOF) ? c : getchar(); }
void ungetch(int c) { _buf = c; }

int getword(char *word, int lim)
{
    int c;
    char *w = word;

    while (isspace(c = getch()))
        ;
    if (c == EOF) { word[0] = '\0'; return EOF; }

    if (c == '#') {
        while ((c = getch()) != '\n' && c != EOF)
            ;
        word[0] = '#'; word[1] = '\0'; return '#';
    }
    if (c == '/') {
        int nc = getch();
        if (nc == '*') {
            int prev = 0;
            while ((c = getch()) != EOF) {
                if (prev == '*' && c == '/') break;
                prev = c;
            }
            word[0] = ' '; word[1] = '\0'; return ' ';
        }
        ungetch(nc);
        word[0] = '/'; word[1] = '\0'; return '/';
    }
    if (c == '"') {
        int prev = 0;
        while ((c = getch()) != EOF) {
            if (c == '"' && prev != '\\') break;
            prev = (prev == '\\') ? 0 : c;
        }
        word[0] = '"'; word[1] = '\0'; return '"';
    }
    if (c == '\'') {
        int prev = 0;
        while ((c = getch()) != EOF) {
            if (c == '\'' && prev != '\\') break;
            prev = (prev == '\\') ? 0 : c;
        }
        word[0] = '\''; word[1] = '\0'; return '\'';
    }
    if (isalpha(c) || c == '_') {
        *w++ = c;
        for ( ; --lim > 0; w++) {
            c = getch();
            if (!isalnum(c) && c != '_') { ungetch(c); break; }
            *w = c;
        }
        *w = '\0';
        return word[0];
    }
    *w++ = c; *w = '\0';
    return c;
}

/* Binary tree: stores unique identifiers */
struct tnode {
    char *word;
    struct tnode *left, *right;
};

static char *my_strdup(const char *s)
{
    char *p = (char *)malloc(strlen(s) + 1);
    if (p) strcpy(p, s);
    return p;
}

static struct tnode *insert(struct tnode *p, const char *w)
{
    int cond;
    if (p == NULL) {
        p = (struct tnode *)malloc(sizeof(struct tnode));
        p->word  = my_strdup(w);
        p->left  = p->right = NULL;
    } else if ((cond = strcmp(w, p->word)) < 0) {
        p->left  = insert(p->left,  w);
    } else if (cond > 0) {
        p->right = insert(p->right, w);
    }
    /* cond == 0: duplicate, ignore */
    return p;
}

/* In-order walk; compare each word to its predecessor */
static int  nprefix;           /* the N parameter */
static char prev_word[MAXWORD];
static char group_started;

static void check(const char *a, const char *b)
{
    /* share first nprefix chars AND differ after them */
    if (strncmp(a, b, nprefix) == 0 && strcmp(a, b) != 0) {
        if (!group_started) {
            printf("%s\n", a);
            group_started = 1;
        }
        printf("%s\n", b);
    } else {
        if (group_started) printf("\n");
        group_started = 0;
    }
}

static void walk(struct tnode *p)
{
    if (p == NULL) return;
    walk(p->left);
    if (prev_word[0] != '\0')
        check(prev_word, p->word);
    strcpy(prev_word, p->word);
    walk(p->right);
}

int main(int argc, char *argv[])
{
    struct tnode *root = NULL;
    char word[MAXWORD];

    nprefix = (argc > 1) ? atoi(argv[1]) : 6;
    if (nprefix < 1) nprefix = 1;

    while (getword(word, MAXWORD) != EOF)
        if (isalpha((unsigned char)word[0]) || word[0] == '_')
            root = insert(root, word);

    prev_word[0] = '\0';
    group_started = 0;
    walk(root);
    if (group_started) printf("\n");
    return 0;
}

Compile and Test

gcc -ansi -Wall ex6-2.c -o ex6-2

# Check the program's own source for 6-char prefix clashes
./ex6-2 < ex6-2.c

# Try with N=3 to see more groups
./ex6-2 3 < ex6-2.c

Sample Output (./ex6-2 3 < ex6-2.c)

check
cond

getch
group_started

insert

prev_word

walk
word

getch/group_started share the 3-char prefix gro; walk/word share wal/wor — wait, those do not share 3 chars... the program correctly prints only true matches.

Design Notes

  • Why a binary tree? The tree deduplicates and sorts in one pass. An in-order traversal then yields all identifiers alphabetically — making prefix comparison trivial: only adjacent words in sorted order can share a prefix.
  • Why compare only adjacent words in the walk? If words abc_1, abc_2, abc_3 all match on the first 3 chars, the in-order walk visits them consecutively. Comparing each word to its predecessor catches all group members without a nested loop.
  • The group_started flag lets us print the first member on demand: only when a second member arrives do we know we have a group, so we print the predecessor then.

What This Exercise Teaches

  • Binary tree as sorted set — inserting into a BST and doing an in-order traversal is equivalent to sorting; duplicate suppression comes for free
  • Parameterising a thresholdnprefix is read from argv[1] and stored as a global; the comparison function itself (strncmp(a, b, nprefix)) doesn't need to change
  • Streaming group detection — comparing each element to only its predecessor (rather than all prior elements) works because sorted order guarantees all prefix-matching words are adjacent

Set Up Your C Environment

← Exercise 6-1  | 
Chapter 6 Solutions  | 
Exercise 6-3 →

Book: The C Programming Language, 2nd Ed — Kernighan & Ritchie

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>