It occurred to me -- and I know it will sound like a heresy -- that
maybe providing an overly long example in C is not the best option here.
Why not page_owner.py with the following content instead (not tested):


#!/usr/bin/python
import collections
import sys

counts = collections.defaultdict(int)

txt = ''
for line in sys.stdin:
    if line == '\n':
        counts[txt] += 1
        txt = ''
    else:
        txt += line
counts[txt] += 1

for txt, num in sorted(counts.items(), txt=lambda x: x[1]):
    if len(txt) > 1:
        print '%d times:\n%s' % num, txt


And it's so “long” only because I chose not to read the whole file at
once as in:

    
counts = collections.defaultdict(int)
for txt in sys.stdin.read().split('\n\n'):
    counts[txt] += 1


On Fri, Jan 11 2013, Minchan Kim wrote:
> The read_block reads char one by one until meeting two newline.
> It's not good for the performance and current code isn't good shape
> for readability.
>
> This patch enhances speed and clean up.
>
> Cc: Mel Gorman <mgor...@suse.de>
> Cc: Andy Whitcroft <a...@shadowen.org>
> Cc: Alexander Nyberg <al...@dsv.su.se>
> Cc: Randy Dunlap <rdun...@infradead.org>
> Signed-off-by: Michal Nazarewicz <min...@mina86.com>
> Signed-off-by: Minchan Kim <minc...@kernel.org>
> ---
>  Documentation/page_owner.c |   34 +++++++++++++---------------------
>  1 file changed, 13 insertions(+), 21 deletions(-)
>
> diff --git a/Documentation/page_owner.c b/Documentation/page_owner.c
> index 43dde96..96bf481 100644
> --- a/Documentation/page_owner.c
> +++ b/Documentation/page_owner.c
> @@ -28,26 +28,17 @@ static int max_size;
>  
>  struct block_list *block_head;
>  
> -int read_block(char *buf, FILE *fin)
> +int read_block(char *buf, int buf_size, FILE *fin)
>  {
> -     int ret = 0;
> -     int hit = 0;
> -     int val;
> -     char *curr = buf;
> -
> -     for (;;) {
> -             val = getc(fin);
> -             if (val == EOF) return -1;
> -             *curr = val;
> -             ret++;
> -             if (*curr == '\n' && hit == 1)
> -                     return ret - 1;
> -             else if (*curr == '\n')
> -                     hit = 1;
> -             else
> -                     hit = 0;
> -             curr++;
> +     char *curr = buf, *const buf_end = buf + buf_size;
> +
> +     while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) {
> +             if (*curr == '\n') /* empty line */
> +                     return curr - buf;
> +             curr += strlen(curr);
>       }
> +
> +     return -1; /* EOF or no space left in buf. */
>  }
>  
>  static int compare_txt(struct block_list *l1, struct block_list *l2)
> @@ -84,10 +75,12 @@ static void add_list(char *buf, int len)
>       }
>  }
>  
> +#define BUF_SIZE     1024
> +
>  int main(int argc, char **argv)
>  {
>       FILE *fin, *fout;
> -     char buf[1024];
> +     char buf[BUF_SIZE];
>       int ret, i, count;
>       struct block_list *list2;
>       struct stat st;
> @@ -106,11 +99,10 @@ int main(int argc, char **argv)
>       list = malloc(max_size * sizeof(*list));
>  
>       for(;;) {
> -             ret = read_block(buf, fin);
> +             ret = read_block(buf, BUF_SIZE, fin);
>               if (ret < 0)
>                       break;
>  
> -             buf[ret] = '\0';
>               add_list(buf, ret);
>       }
>  
> -- 
> 1.7.9.5
>

-- 
Best regards,                                         _     _
.o. | Liege of Serenely Enlightened Majesty of      o' \,=./ `o
..o | Computer Science,  Michał “mina86” Nazarewicz    (o o)
ooo +----<email/xmpp: m...@google.com>--------------ooO--(_)--Ooo--

Attachment: pgprD7KpwW2uI.pgp
Description: PGP signature

Reply via email to