Am 21.07.2015 um 22:35 schrieb Jim Gibson:
 > That is an unique data structure and transformation requirement (as
most are), so you are not likely to find an existing set of code to do
exactly what you want. However, the transformation should not be too
difficult, so you should try to code up something yourself and ask for
help if you get stuck or would like to optimize your solution.

Ok, maybe I should give more background information.
The aim of this project is to convert the ouput of djvused's
print-outline command to YAML.
The output is like this:

$ djvused file.djvu -e print-outline
(bookmarks
 ("title 1"
  "#1"
  ("title 1.1"
   "#2" )
  ("title 1.2"
   "#3"
   ("title 1.2.1"
    "#4" ) ) )
 ("title 2"
  "#5" ) )

the nesting of outline items - 3 levels in this example - can become
arbitrarily large.
Performance is no issue, so I still use hashrefs instead of arrays.

This is what if have done so far and it now seems to work.
Any comments?

(Any option parsing, error checks, binmodes omitted. The full code is at
github.com/simon0x5b/bookmark-djvu/blob/yaml/bookmark-djvu-extract)

#!/usr/bin/env perl


use 5.010;
use warnings;
use strict;
use YAML::XS;


# start extracting bookmarks with djvused in a pipe
open my $djvused_fd, "-|", ("djvused", "file.djvu", "-u", "-e",
                            "print-outline");



my $outline_items = parse_djvused_output ($djvused_fd);
die "no outline found" unless @$outline_items;

dump_to_yaml ($outline_items);



# arg: open file descriptor to djvused
# returns: arrayref of hashrefs, each describing level, title and page of an
# outline item.
sub parse_djvused_output {
    my $djvused_fd = shift;
    my $level = 0;
    my $line = <$djvused_fd>;

    if (!$line or $line !~ /^\(bookmarks$/) {
        warn "no bookmarks found\n";
        return 1;
    }
    my $outline_items = [];
    while ($line = <$djvused_fd>) {
        chomp $line;
        $line =~ /^\s*\("(?<title>.+)"$/
            or die "line '$line' in djvused output does not match.";

        
        my $title = djvused_unescape ($+{title});
        
        $line = <$djvused_fd>;

        $line =~ /\s*"#(?<page>[0-9]+)"(?<close>(\s*\))*)\s*$/
            or die "line '$line' in djvused output does not match.";

        push @$outline_items, {level => $level, title => $title,
                               page => $+{page}};
        $level -= ($+{close} =~ tr/\)//) - 1;
    }

    return $outline_items;
}



sub dump_to_yaml {
    my $input_array = shift;
    (my $array, undef) = get_recursive_array ($input_array, 0);
    print  Dump ($array);
}

# recursively convert flat array into nested data structure
# args: input array ref, index to start
# returns: output array ref, following index
sub get_recursive_array {
    my $input_array = shift;
    my $start = shift;
    my $index = $start;
    my $last = $#$input_array;
    my $level = $input_array->[$index]{level};
    my $result_array = [];
    while (1) {
        
        last if $index > $last;
        my $next_level = $input_array->[$index]{level};
        die "error in get_recursive_array" if $next_level > $level;
        
        last if $next_level < $level;

        my $outline_item = {title => $input_array->[$index]{title},
                            page => $input_array->[$index]{page}};


        if ($index + 1 <= $last
            && $input_array->[$index + 1]{level} > $level) {
            ($outline_item->{kids}, $index) =
                get_recursive_array ($input_array, $index + 1);
        }
        else {
            ++$index;
        }
        
        push @$result_array, $outline_item;
    }

    return ($result_array, $index);
}


sub djvused_unescape {
    # see djvused(1): "DJVUSED FILE FORMATS - Strings"
    # and perlrebackslash
    my $string = shift;

    # octal escape sequences
    $string =~ s/\\([0-7]{3})/pack 'U', oct($1)/ge;

    $string =~ s/\\(.)/
    if ($1 eq "\\" or $1 eq "\"") {
        $1;
    }
    elsif ($1 eq "a") {
        "\a";
    }
    elsif ($1 eq "b") {
        "\b";
    }
    elsif ($1 eq "t") {
        "\t";
    }
    elsif ($1 eq "n") {
        "\n";
    }
    elsif ($1 eq "v") {
        "\x{0b}";
    }
    elsif ($1 eq "f") {
        "\f";
    }
    elsif ($1 eq "r") {
        "\r";
    }
    else {
        die "unknown escape sequence in '$string'";

    }
    /ge;
    return $string
}







-- 
To unsubscribe, e-mail: beginners-unsubscr...@perl.org
For additional commands, e-mail: beginners-h...@perl.org
http://learn.perl.org/


Reply via email to