Hello!

Here is my patch to add -k support to sort and remove the -u flag from
it,  because it does not work (and even less  so with my -k patch.  It
does strcmp to compare two lines  that are already sorted to  check if
they are  the same,  but that already  does not work  when  "-n -u" is
given.  A correct implementation of -u should rather check if the sort
function thinks they were the same.

The -k flag is not complete in that it does not support modifiers
specific to a single key definition.

Regards,
Jakob Kramer
>From 1ac4b7f4339c78b08cdb942b310a4c653ce8d1b1 Mon Sep 17 00:00:00 2001
From: Jakob Kramer <jakob.kra...@gmx.de>
Date: Sat, 12 Apr 2014 17:53:10 +0200
Subject: [PATCH] sort: add -k, remove -u

Options  that are  specific to  a  single  key  definition are not
supported (e.g. "sort -k 2,3n -k 4,4").  Should you try to specify
such definitions, sort will  return with EXIT_FAILURE and an error
message.   Instead, all key definitions exclusively use the global
settings.

It always behaves like -b was set.

I removed -u because it does not work the way that it was implemented
here.  It should be rewritten so that it checks if the sort function
thinks that the strings were the same.
---
 sort.1 |  20 ++++++--
 sort.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 169 insertions(+), 18 deletions(-)

diff --git a/sort.1 b/sort.1
index 7913357..80fa692 100644
--- a/sort.1
+++ b/sort.1
@@ -3,7 +3,10 @@
 sort \- sort lines
 .SH SYNOPSIS
 .B sort
-.RB [ \-nru ]
+.RB [ \-nr ]
+.RB [ \-k
+.I key
+.R ]...
 .RI [ file ...]
 .SH DESCRIPTION
 .B sort
@@ -17,5 +20,16 @@ perform a numeric sort.
 .B \-r
 reverses the sort.
 .TP
-.B \-u
-prints repeated lines only once.
+.B \-k key
+specifies a key definition of the form \fBS\fR[.\fBs\fR][,\fBE\fR[.\fBe\fR]],
+where
+.B S,
+.B s,
+.B E,
+and
+.B e
+are the starting column, starting character in that column, ending column and
+the ending character of that column respectively.  If they are not specified,
+s refers to the first character of the specified starting column, E refers to
+the last column of every line, and e refers to the last character of that last
+column.
diff --git a/sort.c b/sort.c
index 348e16b..f43464f 100644
--- a/sort.c
+++ b/sort.c
@@ -1,4 +1,5 @@
 /* See LICENSE file for copyright and license details. */
+#include <ctype.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -7,10 +8,30 @@
 #include "text.h"
 #include "util.h"
 
+struct keydef {
+	unsigned start_column;
+	unsigned end_column;
+	unsigned start_char;
+	unsigned end_char;
+};
+
+struct kdlist {
+	struct keydef keydef;
+	struct kdlist *next;
+};
+
+static struct kdlist *head = NULL;
+static struct kdlist *curr = NULL;
+
+static void addkeydef(char *);
+static void freelist(void);
 static int linecmp(const char **, const char **);
+static char *next_nonblank(char *);
+static char *next_blank(char *);
+static int parse_keydef(struct keydef *, char *);
+static char *columns(char *, const struct keydef *);
 
 static bool rflag = false;
-static bool uflag = false;
 static bool nflag = false;
 
 static struct linebuf linebuf = EMPTY_LINEBUF;
@@ -18,7 +39,7 @@ static struct linebuf linebuf = EMPTY_LINEBUF;
 static void
 usage(void)
 {
-	eprintf("usage: %s [-nru] [file...]\n", argv0);
+	enprintf(2, "usage: %s [-nr] [-k def]... [file...]\n", argv0);
 }
 
 int
@@ -34,18 +55,20 @@ main(int argc, char *argv[])
 	case 'r':
 		rflag = true;
 		break;
-	case 'u':
-		uflag = true;
+	case 'k':
+		addkeydef(EARGF(usage()));
 		break;
 	default:
 		usage();
 	} ARGEND;
 
+	addkeydef("1");
+
 	if(argc == 0) {
 		getlines(stdin, &linebuf);
 	} else for(; argc > 0; argc--, argv++) {
 		if(!(fp = fopen(argv[0], "r"))) {
-			weprintf("fopen %s:", argv[0]);
+			enprintf(2, "fopen %s:", argv[0]);
 			continue;
 		}
 		getlines(fp, &linebuf);
@@ -55,24 +78,138 @@ main(int argc, char *argv[])
 			(int (*)(const void *, const void *))linecmp);
 
 	for(i = 0; i < linebuf.nlines; i++) {
-		if(!uflag || i == 0 || strcmp(linebuf.lines[i],
-					linebuf.lines[i-1]) != 0) {
-			fputs(linebuf.lines[i], stdout);
-		}
+		fputs(linebuf.lines[i], stdout);
 	}
 
+	freelist();
 	return EXIT_SUCCESS;
 }
 
-int
+static void
+addkeydef(char *def)
+{
+	struct kdlist *node;
+
+	node = malloc(sizeof(*node));
+	if(!node)
+		enprintf(2, "malloc:");
+	if(!head)
+		head = node;
+	if(parse_keydef(&node->keydef, def))
+		enprintf(2, "parse_keydef:");
+	if(curr)
+		curr->next = node;
+	node->next = NULL;
+	curr = node;
+}
+
+static void
+freelist(void)
+{
+	struct kdlist *node;
+	struct kdlist *tmp;
+
+	for(node = head; node; node = tmp) {
+		tmp = node->next;
+		free(node);
+	}
+}
+
+static int
 linecmp(const char **a, const char **b)
 {
-	if (nflag) {
-		if (rflag)
-			return strtoul(*b, 0, 10) - strtoul(*a, 0, 10);
+	char *s1, *s2;
+	int res = 0;
+	struct kdlist *node;
+
+	for(node = head; node && res == 0; node = node->next) {
+		s1 = columns((char *)*a, &node->keydef);
+		s2 = columns((char *)*b, &node->keydef);
+
+		/* don't consider modifiers if it's the default key
+		 * definition that was implicitly added */
+		if(!(node == head) && !node->next)
+			res = strcmp(s1, s2);
+		else if(nflag)
+			res = strtoul(s1, 0, 10) - strtoul(s2, 0, 10);
 		else
-			return strtoul(*a, 0, 10) - strtoul(*b, 0, 10);
+			res = strcmp(s1, s2);
+
+		free(s1);
+		free(s2);
+	}
+	return rflag ? -res : res;
+}
+
+static int
+parse_keydef(struct keydef *kd, char *s)
+{
+	char *rest = s;
+	kd->start_column = 1;
+	kd->start_char = 1;
+	/* 0 means end of line */
+	kd->end_column = 0;
+	kd->end_char = 0;
+
+	kd->start_column = strtoul(rest, &rest, 10);
+	if(!kd->start_column)
+		enprintf(2, "starting column cannot be 0\n");
+	if(*rest == '.')
+		kd->start_char = strtoul(rest+1, &rest, 10);
+	if(*rest == ',') {
+		kd->end_column = strtoul(rest+1, &rest, 10);
+		if(kd->end_column < kd->start_column)
+			enprintf(2, ",%u is too small\n", kd->end_column);
 	}
-	return strcmp(*a, *b) * (rflag ? -1 : +1);
+	if(*rest == '.')
+		kd->end_char = strtoul(rest+1, &rest, 10);
+	if(*rest != '\0')
+		return -1;
+	return 0;
 }
 
+static char *
+next_nonblank(char *s)
+{
+	for(; *s && isblank(*s); s++);
+	return s;
+}
+
+static char *
+next_blank(char *s)
+{
+	for(; *s && !isblank(*s); s++);
+	return s;
+}
+
+static char *
+columns(char *line, const struct keydef *kd)
+{
+	char *rest;
+	char *start, *end;
+	unsigned i;
+	for(rest = line, i = 0; i < kd->start_column; i++) {
+		if(i != 0)
+			rest = next_blank(rest);
+		rest = next_nonblank(rest);
+	}
+	for(i = 1; i < kd->start_char && !isblank(*rest); i++, rest++);
+	start = rest;
+
+	if(kd->end_column) {
+		for(rest = line, i = 0; i < kd->end_column; i++) {
+			if(i != 0)
+				rest = next_blank(rest);
+			rest = next_nonblank(rest);
+		}
+		if(kd->end_char) {
+			for(i = 1; i < kd->end_char && *rest && !isblank(*rest); i++, rest++);
+		} else {
+			rest = next_blank(rest);
+		}
+		end = rest;
+	} else {
+		end = rest + strlen(rest);
+	}
+	return strndup(start, end - start);
+}
-- 
1.8.5.1

Reply via email to