Exercise 6-3. Write a cross-referencer that prints a list of all words in a document, and for each word, a list of the line numbers on which it occurs. Remove noise words like “the”, “and”, and so on from the list.
The data structure is a binary tree where each node holds the word and a linked list of line numbers. Duplicates within a line are suppressed (we store a line number only once per occurrence). A small table of noise words is checked before insertion.
Solution
/* K&R Exercise 6-3 — cross-reference: word -> list of line numbers
* Compile: gcc -ansi -Wall ex6-3.c -o ex6-3
* Usage: ./ex6-3 < document.txt */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define MAXWORD 100
/* Linked list of line numbers */
struct lnode {
int lineno;
struct lnode *next;
};
/* Binary tree node */
struct tnode {
char *word;
struct lnode *lines;
struct tnode *left, *right;
};
/* Noise words to suppress */
static const char *noise[] = {
"a", "an", "the", "and", "or", "but", "in", "on", "at",
"to", "of", "is", "it", "be", "as", "by", "we", "he",
"she", "they", "this", "that", "for", "are", "was", "were",
"with", "not", "from", "have", "has", "had", NULL
};
static int is_noise(const char *w)
{
const char **p;
for (p = noise; *p; p++)
if (strcmp(w, *p) == 0) return 1;
return 0;
}
static char *my_strdup(const char *s)
{
char *p = (char *)malloc(strlen(s) + 1);
if (p) strcpy(p, s);
return p;
}
/* Append lineno to the linked list, suppressing duplicates */
static struct lnode *addline(struct lnode *head, int lineno)
{
struct lnode *p, *prev = NULL, *cur = head;
/* find insertion point (keep list sorted, no dup) */
while (cur && cur->lineno < lineno) {
prev = cur;
cur = cur->next;
}
if (cur && cur->lineno == lineno)
return head; /* already recorded on this line */
p = (struct lnode *)malloc(sizeof(struct lnode));
p->lineno = lineno;
p->next = cur;
if (prev) prev->next = p;
else head = p;
return head;
}
static struct tnode *addword(struct tnode *p, const char *w, int lineno)
{
int cond;
if (p == NULL) {
p = (struct tnode *)malloc(sizeof(struct tnode));
p->word = my_strdup(w);
p->lines = NULL;
p->left = p->right = NULL;
} else if ((cond = strcmp(w, p->word)) < 0) {
p->left = addword(p->left, w, lineno);
return p;
} else if (cond > 0) {
p->right = addword(p->right, w, lineno);
return p;
}
/* cond == 0: word already in tree — just append the line number */
p->lines = addline(p->lines, lineno);
return p;
}
static void print_lines(struct lnode *p)
{
int col = 0;
for ( ; p; p = p->next) {
if (col > 0 && col % 10 == 0) { printf("\n%*s", 20, ""); }
printf(" %4d", p->lineno);
col++;
}
printf("\n");
}
static void treeprint(struct tnode *p)
{
if (p) {
treeprint(p->left);
printf("%-20s", p->word);
print_lines(p->lines);
treeprint(p->right);
}
}
int main(void)
{
struct tnode *root = NULL;
char word[MAXWORD];
int c, lineno = 1;
/* Simple word reader — treats any non-alnum as separator */
for (;;) {
/* skip non-alpha */
while ((c = getchar()) != EOF && !isalpha(c)) {
if (c == '\n') lineno++;
}
if (c == EOF) break;
/* read word */
char *w = word;
int lim = MAXWORD - 1;
*w++ = (char)tolower(c);
while (lim-- > 0 && isalpha(c = getchar()))
*w++ = (char)tolower(c);
*w = '\0';
if (!is_noise(word))
root = addword(root, word, lineno);
if (c == '\n') lineno++;
}
treeprint(root);
return 0;
}
Compile and Test
gcc -ansi -Wall ex6-3.c -o ex6-3
printf "the quick brown fox\njumps over the lazy dog\nthe fox ran away\n" | ./ex6-3
Sample Output
away 3 brown 1 dog 2 fox 1 3 jumps 2 lazy 2 over 2 quick 1 ran 3
Noise words (the, over is not noise here — “over” is not in the list) are suppressed; fox appears on lines 1 and 3.
Design Notes
- Sorted linked list for line numbers —
addlineinserts in order and suppresses duplicates; if a word appears twice on the same line it is recorded only once - Case-folding on input — words are lowercased as they are read, so
Foxandfoxcount as the same word - Column wrap at 10 —
print_lineswraps at 10 line numbers per row, aligning subsequent rows under the first, for readability with dense output
What This Exercise Teaches
- Struct with embedded list — each tree node carries a secondary linked structure; this nested combination of BST + linked list is a common real-world pattern (e.g., hash tables with chaining)
- Sorted insert with duplicate suppression —
addlinewalks the list to find the insertion point and checks for equality before allocating, keeping the list compact - Noise-word filtering — a NULL-terminated array of strings is a simple, fast-enough table for small sets; for larger sets a hash table (as in Exercise 6-5) would be appropriate
Set Up Your C Environment
← Exercise 6-2 |
Chapter 6 Solutions |
Exercise 6-4 →
Book: The C Programming Language, 2nd Ed — Kernighan & Ritchie
2 comments on “K&R C Programs Exercise 6-3”
When I ran the program i kept on providing input on a blank screen, nothing happened cAN YOU HELP ME OUT
Hello,
Check the program again. I noticed that when I copy the code from web-page to my editor, it's causing some character encoding issues.
Also try copying from GitHub https://github.com/snadahalli/cprograms/blob/master/knrc/knrc6_3.c
Let me know if you still face this issue.