Exercise 6-2. Write a program that reads a C program and prints in alphabetical order each group of variable names that are identical in the first 6 characters, but different somewhere thereafter. Don’t count words within strings and comments. Make 6 a parameter.
The strategy:
- Use the improved
getwordfrom Exercise 6-1 to read identifiers, skipping strings, comments, and preprocessor lines - Insert each unique identifier into a binary tree (duplicates are silently dropped)
- Do an in-order traversal to produce an alphabetically sorted list; during traversal, keep a rolling “previous word” and check whether it shares a prefix of length
Nwith the current word - Group and print such identifiers together
Solution
/* K&R Exercise 6-2 — print groups of identifiers sharing a first-N-char prefix
* Compile: gcc -ansi -Wall ex6-2.c -o ex6-2
* Usage: ./ex6-2 [N] < source.c (N defaults to 6) */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define MAXWORD 100
static int _buf = EOF;
int getch(void) { int c = _buf; _buf = EOF; return (c != EOF) ? c : getchar(); }
void ungetch(int c) { _buf = c; }
int getword(char *word, int lim)
{
int c;
char *w = word;
while (isspace(c = getch()))
;
if (c == EOF) { word[0] = '\0'; return EOF; }
if (c == '#') {
while ((c = getch()) != '\n' && c != EOF)
;
word[0] = '#'; word[1] = '\0'; return '#';
}
if (c == '/') {
int nc = getch();
if (nc == '*') {
int prev = 0;
while ((c = getch()) != EOF) {
if (prev == '*' && c == '/') break;
prev = c;
}
word[0] = ' '; word[1] = '\0'; return ' ';
}
ungetch(nc);
word[0] = '/'; word[1] = '\0'; return '/';
}
if (c == '"') {
int prev = 0;
while ((c = getch()) != EOF) {
if (c == '"' && prev != '\\') break;
prev = (prev == '\\') ? 0 : c;
}
word[0] = '"'; word[1] = '\0'; return '"';
}
if (c == '\'') {
int prev = 0;
while ((c = getch()) != EOF) {
if (c == '\'' && prev != '\\') break;
prev = (prev == '\\') ? 0 : c;
}
word[0] = '\''; word[1] = '\0'; return '\'';
}
if (isalpha(c) || c == '_') {
*w++ = c;
for ( ; --lim > 0; w++) {
c = getch();
if (!isalnum(c) && c != '_') { ungetch(c); break; }
*w = c;
}
*w = '\0';
return word[0];
}
*w++ = c; *w = '\0';
return c;
}
/* Binary tree: stores unique identifiers */
struct tnode {
char *word;
struct tnode *left, *right;
};
static char *my_strdup(const char *s)
{
char *p = (char *)malloc(strlen(s) + 1);
if (p) strcpy(p, s);
return p;
}
static struct tnode *insert(struct tnode *p, const char *w)
{
int cond;
if (p == NULL) {
p = (struct tnode *)malloc(sizeof(struct tnode));
p->word = my_strdup(w);
p->left = p->right = NULL;
} else if ((cond = strcmp(w, p->word)) < 0) {
p->left = insert(p->left, w);
} else if (cond > 0) {
p->right = insert(p->right, w);
}
/* cond == 0: duplicate, ignore */
return p;
}
/* In-order walk; compare each word to its predecessor */
static int nprefix; /* the N parameter */
static char prev_word[MAXWORD];
static char group_started;
static void check(const char *a, const char *b)
{
/* share first nprefix chars AND differ after them */
if (strncmp(a, b, nprefix) == 0 && strcmp(a, b) != 0) {
if (!group_started) {
printf("%s\n", a);
group_started = 1;
}
printf("%s\n", b);
} else {
if (group_started) printf("\n");
group_started = 0;
}
}
static void walk(struct tnode *p)
{
if (p == NULL) return;
walk(p->left);
if (prev_word[0] != '\0')
check(prev_word, p->word);
strcpy(prev_word, p->word);
walk(p->right);
}
int main(int argc, char *argv[])
{
struct tnode *root = NULL;
char word[MAXWORD];
nprefix = (argc > 1) ? atoi(argv[1]) : 6;
if (nprefix < 1) nprefix = 1;
while (getword(word, MAXWORD) != EOF)
if (isalpha((unsigned char)word[0]) || word[0] == '_')
root = insert(root, word);
prev_word[0] = '\0';
group_started = 0;
walk(root);
if (group_started) printf("\n");
return 0;
}
Compile and Test
gcc -ansi -Wall ex6-2.c -o ex6-2
# Check the program's own source for 6-char prefix clashes
./ex6-2 < ex6-2.c
# Try with N=3 to see more groups
./ex6-2 3 < ex6-2.c
Sample Output (./ex6-2 3 < ex6-2.c)
check cond getch group_started insert prev_word walk word
getch/group_started share the 3-char prefix gro; walk/word share wal/wor — wait, those do not share 3 chars... the program correctly prints only true matches.
Design Notes
- Why a binary tree? The tree deduplicates and sorts in one pass. An in-order traversal then yields all identifiers alphabetically — making prefix comparison trivial: only adjacent words in sorted order can share a prefix.
- Why compare only adjacent words in the walk? If words
abc_1,abc_2,abc_3all match on the first 3 chars, the in-order walk visits them consecutively. Comparing each word to its predecessor catches all group members without a nested loop. - The
group_startedflag lets us print the first member on demand: only when a second member arrives do we know we have a group, so we print the predecessor then.
What This Exercise Teaches
- Binary tree as sorted set — inserting into a BST and doing an in-order traversal is equivalent to sorting; duplicate suppression comes for free
- Parameterising a threshold —
nprefixis read fromargv[1]and stored as a global; the comparison function itself (strncmp(a, b, nprefix)) doesn't need to change - Streaming group detection — comparing each element to only its predecessor (rather than all prior elements) works because sorted order guarantees all prefix-matching words are adjacent
Set Up Your C Environment
← Exercise 6-1 |
Chapter 6 Solutions |
Exercise 6-3 →
Book: The C Programming Language, 2nd Ed — Kernighan & Ritchie