Content-Type: text/plain; charset=iso-8859-15 Content-Transfer-Encoding: quoted-printable
Hello,
My last post on Debian Bugs Tracking for this bug, which now seems
more like a whishlist, so I Cc the original author.
This patch against the original Packages.pm correct my
$parsed{Files} =3D { $filename =3D>....} bug, it must be
$parsed{Files}{$filename} =3D
I add a ignore_fileds method to tell the parser not to take care of
certains fileds.
Regards.
=2D-=20
Daniel 'NebuchadnezzaR' Dehennin
R=E9cup=E9rer ma clef GPG:
gpg --keyserver pgp.mit.edu --recv-keys 0x2A408F69
Content-Disposition: attachment; filename=Packages.pm.diff
Content-Transfer-Encoding: quoted-printable
Content-Description: Packages.pm.diff
=2D-- Packages.pm.old 2005-08-26 02:58:55.000000000 +0200
+++ Packages.pm 2005-08-26 15:29:33.000000000 +0200
@@ -1,25 +1,67 @@
use strict;
package Parse::Debian::Packages;
=2Dour $VERSION =3D '0.01';
+our $VERSION =3D "0.02";
+
+use Compress::Zlib;
+use Compress::Bzip2;
+use File::MMagic;
+use FileHandle;
=20
sub new {
my $class =3D shift;
=2D my $fh =3D shift;
+ my $file =3D shift;
+ my $fh;
=20
=2D return bless { fh =3D> $fh }, $class;
+ if (! ref $file) {
+ # Caller give us a filename
+ return undef unless -f $file;
+
+ # Default magic is ok for application/x-gzip application/x-bzip2 and
text=
/plain
+ my $magic =3D File::MMagic->new();
+ my $type =3D $magic->checktype_filename($file);
+
+ SWITCH: for ($type) {
+ /text\/plain/ && do {
+ $fh =3D new FileHandle;
+ $fh->open("< $file") or return undef;
+ last;
+ };
+=09=20=20
+ /application\/x-gzip/ && do {
+ $fh =3D gzopen ($file, "rb") or return undef;
+ last;
+ };
+=09=20=20
+ /application\/x-bzip2/ && do {
+ $fh =3D bzopen ($file, "rb") or return undef;
+ last;
+ };
+ # It's not a supported file format
+ return undef;
+ }
+ return bless { FH =3D> $fh, TYPE =3D> $type, FIELDS_IGNORED =3D> {}},=
$class;
+ } else {
+ return bless { FH =3D> $file, TYPE =3D> "IOFile", FIELDS_IGNORED =3D>=
{}}, $class;
+ }
}
=20
sub next {
my $self =3D shift;
=2D my $fh =3D $self->{fh};
=20
my %parsed;
=2D while (<$fh>) {
+ while ($_ =3D $self->__readline) {
last if /^$/;
=2D if (my ($key, $value) =3D m/^(.*): (.*)/) {
=2D $parsed{$key} =3D $value;
=2D }
=2D else {
+
+ if (my ($key, $value) =3D m/^([^\s:]*):\s?(.*)/) {
+ # Do not add an empty Files key when parsing Sources
+ $parsed{$key} =3D $value unless $key eq "Files"
+ or exists $self->{FIELDS_IGNORED}->{$key};
+
+ } elsif (!exists $self->{FIELDS_IGNORED}->{Files}
+ and my ($md5, $size, $filename) =3D
/^\s(\w{32})\s(\d+)\s(.*)/) {
+ $parsed{Files}{$filename} =3D { size =3D> $size, MD5sum =3D> $md5 };
+ } elsif (! /^\s(\w{32})\s(\d+)\s(.*)/
+ and !exists $self->{FIELDS_IGNORED}->{body}) { # Do not include
Sources =
Files as body
s/ //;
s/^\.$//;
$parsed{body} .=3D $_;
@@ -29,7 +71,47 @@
return %parsed;
}
=20
=2D1;
+sub ignore_fileds {
+ my $self =3D shift;
+ if (@_) {
+ return map { $self->{FIELDS_IGNORED}->{$_} =3D 1
+ unless exists $self->{FIELDS_IGNORED}->{$_} } @_;
+ } else {
+ return sort keys %{$self->{FIELDS_IGNORED}};
+ }
+}
+
+sub __readline {
+ my $self =3D shift;
+ my $line =3D "";
+
+ SWITCH: for ($self->{TYPE}) {
+ /text\/plain|IOFile/ && do {
+ $line =3D $self->{FH}->getline;
+ last;
+ };
+=09=20=20
+ /application\/x-gzip/ && do {
+ my $bytesread =3D $self->{FH}->gzreadline($line);
+ if ($bytesread =3D=3D 0) {
+ $line =3D "";
+ }
+ last;
+ };
+=09=20=20
+ /application\/x-bzip2/ && do {
+ my $bytesread =3D $self->{FH}->bzreadline($line);
+ if ($bytesread =3D=3D 0) {
+ $line =3D "";
+ }
+ last;
+ };
+ die "Should Never Happend\n";
+ }
+ return $line;
+}
+
+1
=20
=20
=3Dhead1 NAME
@@ -40,24 +122,55 @@
=20
use YAML;
use IO::File;
+ use FileHandle;
use Parse::Debian::Packages;
=2D my $fh =3D IO::File->new("Packages");
=20
=2D my $parser =3D Parse::Debian::Packages->new( $fh );
=2D while (my %package =3D $parser->next) {
+ my $pkg_file =3D "Packages";
+ my $src_file =3D "Sources";
+ my $other_src_file =3D "Sources.bz2";
+
+ my $fh_io =3D IO::File->new($pkg_file);
+ my $fh_FH =3D new FileHandle;
+ $fh_FH->open("< $src_file");
+
+ my $parser_on_io =3D Parse::Debian::Packages->new( $fh_io );
+ my $parser_on_FH =3D Parse::Debian::Packages->new( $fh_FH );
+ my $parser_on_filename =3D Parse::Debian::Packages->new( $other_src_file =
);
+
+ parser_on_io->ignore_fileds("Description", "body");
+ parser_on_FH->ignore_fileds("Build-Depends", "Files");
+
+ my %pkg_with_io =3D $parser_on_io->next;
+ my %pkg_with_FH =3D $parser_on_FH->next;
+ my %pkg_with_filename =3D $parser_on_filename->next;
+
+ print Dump \%pkg_with_io;
+ print Dump \%pkg_with_FH;
+ print Dump \%pkg_with_filename;
+
+ while (my %package =3D $parser_on_io->next) {
print Dump \%package;
}
=20
=3Dhead1 DESCRIPTION
=20
=2DThis module parses the Packages files used by the debian package
=2Dmanagement tools.
+This module parses the Packages and Sources files used by the debian
+package management tools.
=20
It presents itself as an iterator. Each call of the ->next method
will return the next package found in the file.
=20
=2DFor laziness, we take a filehandle in to the constructor. Please open
=2Dthe file for us.
+You can pass a FileHandle to the constructor of a filename, the
+advantage of the filename is that you can parse plain/text, gziped or
+bziped files.
+
+If the filename passed to the constructor don't repressent a file in
+supported format (text/plain, application/x-gzip,
+application/x-bzip2) or if that file can not be open, new() return undef.
+
+You can ignore some fileds with ignore_fileds() method, it take a list
+of filed names you find in Packages ou Sources files, with one special
+'body' filed which correspond to the long description.
=20
=3Dhead1 AUTHOR
=20
pgpnZg9MDf72X.pgp
Description: PGP signature

