K&R C Programs Exercise 6-3

Exercise 6-3. Write a cross-referencer that prints a list of all words in a document, and for each word, a list of the line numbers on which it occurs. Remove noise words like “the”, “and”, and so on from the list.

The data structure is a binary tree where each node holds the word and a linked list of line numbers. Duplicates within a line are suppressed (we store a line number only once per occurrence). A small table of noise words is checked before insertion.

Solution

/* K&R Exercise 6-3 — cross-reference: word -> list of line numbers
 * Compile: gcc -ansi -Wall ex6-3.c -o ex6-3
 * Usage:   ./ex6-3 < document.txt */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

#define MAXWORD 100

/* Linked list of line numbers */
struct lnode {
    int lineno;
    struct lnode *next;
};

/* Binary tree node */
struct tnode {
    char         *word;
    struct lnode *lines;
    struct tnode *left, *right;
};

/* Noise words to suppress */
static const char *noise[] = {
    "a", "an", "the", "and", "or", "but", "in", "on", "at",
    "to", "of", "is", "it", "be", "as", "by", "we", "he",
    "she", "they", "this", "that", "for", "are", "was", "were",
    "with", "not", "from", "have", "has", "had", NULL
};

static int is_noise(const char *w)
{
    const char **p;
    for (p = noise; *p; p++)
        if (strcmp(w, *p) == 0) return 1;
    return 0;
}

static char *my_strdup(const char *s)
{
    char *p = (char *)malloc(strlen(s) + 1);
    if (p) strcpy(p, s);
    return p;
}

/* Append lineno to the linked list, suppressing duplicates */
static struct lnode *addline(struct lnode *head, int lineno)
{
    struct lnode *p, *prev = NULL, *cur = head;

    /* find insertion point (keep list sorted, no dup) */
    while (cur && cur->lineno < lineno) {
        prev = cur;
        cur  = cur->next;
    }
    if (cur && cur->lineno == lineno)
        return head;   /* already recorded on this line */

    p = (struct lnode *)malloc(sizeof(struct lnode));
    p->lineno = lineno;
    p->next   = cur;
    if (prev) prev->next = p;
    else      head       = p;
    return head;
}

static struct tnode *addword(struct tnode *p, const char *w, int lineno)
{
    int cond;
    if (p == NULL) {
        p = (struct tnode *)malloc(sizeof(struct tnode));
        p->word  = my_strdup(w);
        p->lines = NULL;
        p->left  = p->right = NULL;
    } else if ((cond = strcmp(w, p->word)) < 0) {
        p->left  = addword(p->left,  w, lineno);
        return p;
    } else if (cond > 0) {
        p->right = addword(p->right, w, lineno);
        return p;
    }
    /* cond == 0: word already in tree — just append the line number */
    p->lines = addline(p->lines, lineno);
    return p;
}

static void print_lines(struct lnode *p)
{
    int col = 0;
    for ( ; p; p = p->next) {
        if (col > 0 && col % 10 == 0) { printf("\n%*s", 20, ""); }
        printf(" %4d", p->lineno);
        col++;
    }
    printf("\n");
}

static void treeprint(struct tnode *p)
{
    if (p) {
        treeprint(p->left);
        printf("%-20s", p->word);
        print_lines(p->lines);
        treeprint(p->right);
    }
}

int main(void)
{
    struct tnode *root = NULL;
    char word[MAXWORD];
    int  c, lineno = 1;

    /* Simple word reader — treats any non-alnum as separator */
    for (;;) {
        /* skip non-alpha */
        while ((c = getchar()) != EOF && !isalpha(c)) {
            if (c == '\n') lineno++;
        }
        if (c == EOF) break;

        /* read word */
        char *w = word;
        int lim = MAXWORD - 1;
        *w++ = (char)tolower(c);
        while (lim-- > 0 && isalpha(c = getchar()))
            *w++ = (char)tolower(c);
        *w = '\0';
        if (!is_noise(word))
            root = addword(root, word, lineno);
        if (c == '\n') lineno++;
    }
    treeprint(root);
    return 0;
}

Compile and Test

gcc -ansi -Wall ex6-3.c -o ex6-3

printf "the quick brown fox\njumps over the lazy dog\nthe fox ran away\n" | ./ex6-3

Sample Output

away                    3
brown                   1
dog                     2
fox                     1    3
jumps                   2
lazy                    2
over                    2
quick                   1
ran                     3

Noise words (the, over is not noise here — “over” is not in the list) are suppressed; fox appears on lines 1 and 3.

Design Notes

  • Sorted linked list for line numbersaddline inserts in order and suppresses duplicates; if a word appears twice on the same line it is recorded only once
  • Case-folding on input — words are lowercased as they are read, so Fox and fox count as the same word
  • Column wrap at 10print_lines wraps at 10 line numbers per row, aligning subsequent rows under the first, for readability with dense output

What This Exercise Teaches

  • Struct with embedded list — each tree node carries a secondary linked structure; this nested combination of BST + linked list is a common real-world pattern (e.g., hash tables with chaining)
  • Sorted insert with duplicate suppressionaddline walks the list to find the insertion point and checks for equality before allocating, keeping the list compact
  • Noise-word filtering — a NULL-terminated array of strings is a simple, fast-enough table for small sets; for larger sets a hash table (as in Exercise 6-5) would be appropriate

Set Up Your C Environment

← Exercise 6-2  | 
Chapter 6 Solutions  | 
Exercise 6-4 →

Book: The C Programming Language, 2nd Ed — Kernighan & Ritchie

2 comments on “K&R C Programs Exercise 6-3

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>