K&R C Programs Exercise 5-17

Exercise 5-17. Add a field-handling capability, so sorting may be done on fields within lines, each field sorted according to an independent set of options. (The index for this book was sorted with -df for the index category and -n for the page numbers.)

Fields are whitespace-delimited columns. The syntax -k<field>[flags] selects a 1-based field number and optional per-field flags (n, r, f, d). Multiple -k options create a priority list: if field 1 compares equal, fall through to field 2, and so on. This is the same model as POSIX sort -k.

Solution

/* K&R Exercise 5-17 — sort with field handling (-k<field>[flags])
 * Compile: gcc -ansi -Wall ex5-17.c -o mysort
 * Usage:   ./mysort -k2n -k1df    (sort by field 2 numeric, then field 1 dir+fold) */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define MAXLINES  5000
#define MAXSTORE  100000
#define MAXKEYS   10

static char  store[MAXSTORE];
static char *lineptr[MAXLINES];

typedef struct { int field; int num; int rev; int fold; int dir; } Key;
static Key   keys[MAXKEYS];
static int   nkeys = 0;

int my_getline(char *s, int lim)
{
    char *p = s; int c;
    while (--lim > 0 && (c = getchar()) != EOF && c != '\n') *p++ = c;
    if (c == '\n') *p++ = c;
    *p = '\0';
    return (int)(p - s);
}

int readlines(void)
{
    int len, n = 0; char *sp = store, line[1024];
    while ((len = my_getline(line, sizeof line)) > 0 && n < MAXLINES
           && sp + len < store + MAXSTORE) {
        line[len-1] = '\0'; strcpy(sp, line);
        lineptr[n++] = sp; sp += len;
    }
    return n;
}

void writelines(char *lp[], int n) { while (n-- > 0) printf("%s\n", *lp++); }

/* Get pointer to field f (1-based) in line s */
static const char *getfield(const char *s, int f)
{
    while (*s && --f > 0) {
        while (*s && !isspace((unsigned char)*s)) s++;
        while (*s &&  isspace((unsigned char)*s)) s++;
    }
    return s;
}

static int cmpfield(const char *a, const char *b, Key *k)
{
    const char *fa = getfield(a, k->field);
    const char *fb = getfield(b, k->field);
    int res;
    if (k->num) {
        double va = atof(fa), vb = atof(fb);
        res = (va < vb) ? -1 : (va > vb) ? 1 : 0;
    } else if (k->dir) {
        const char *pa = fa, *pb = fb;
        int ca, cb;
        do {
            while (*pa && !isalnum((unsigned char)*pa) && *pa != ' ') pa++;
            while (*pb && !isalnum((unsigned char)*pb) && *pb != ' ') pb++;
            ca = k->fold ? tolower((unsigned char)*pa) : (unsigned char)*pa;
            cb = k->fold ? tolower((unsigned char)*pb) : (unsigned char)*pb;
            if (ca == cb) { if (*pa) pa++, pb++; }
            else break;
        } while (*pa && *pb);
        res = ca - cb;
    } else if (k->fold) {
        const char *p = fa, *q = fb;
        int c1, c2;
        while ((c1 = tolower((unsigned char)*p)) == (c2 = tolower((unsigned char)*q)) && *p)
            p++, q++;
        res = c1 - c2;
    } else {
        res = strcmp(fa, fb);
    }
    return k->rev ? -res : res;
}

int compare(const void *a, const void *b)
{
    int i, res = 0;
    const char *s1 = *(const char **)a, *s2 = *(const char **)b;
    for (i = 0; i < nkeys && res == 0; i++)
        res = cmpfield(s1, s2, &keys[i]);
    return res;
}

int main(int argc, char *argv[])
{
    int i, n;
    for (i = 1; i < argc; i++) {
        if (argv[i][0] == '-' && argv[i][1] == 'k' && nkeys < MAXKEYS) {
            Key k = {0,0,0,0,0};
            char *p = argv[i] + 2;
            k.field = atoi(p);
            if (k.field < 1) k.field = 1;
            while (*p && (*p == '-' || isdigit((unsigned char)*p))) p++;
            for (; *p; p++) {
                if (*p == 'n') k.num  = 1;
                if (*p == 'r') k.rev  = 1;
                if (*p == 'f') k.fold = 1;
                if (*p == 'd') k.dir  = 1;
            }
            keys[nkeys++] = k;
        }
    }
    if (nkeys == 0) { /* default: sort whole line lexically */
        keys[0].field = 1; nkeys = 1;
    }
    if ((n = readlines()) > 0) {
        qsort(lineptr, n, sizeof(char *), compare);
        writelines(lineptr, n);
    }
    return 0;
}

Test It

# Sort by 2nd field (numeric), then 1st field (alpha)
printf "banana 2\napple 1\ncherry 2\n" | ./mysort -k2n -k1
# Sort by 1st field directory+fold order
printf "the cat\nThe Cat\nTHE!cat\n" | ./mysort -k1df

What This Exercise Teaches

  • Multi-key sort — sort by primary key, break ties with secondary key; the compare loop falls through keys until a non-zero result is found
  • Field extractiongetfield walks past f-1 whitespace-delimited tokens; the returned pointer is used directly by the comparator without copying
  • Per-key option structs — encapsulating flags in a Key struct rather than globals makes multi-key configuration clean and extensible

Set Up Your C Environment

← Exercise 5-16  | 
Chapter 5 Solutions  | 
Exercise 5-18 →

Book: The C Programming Language, 2nd Ed — Kernighan & Ritchie

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>