Re: [PATCH] send-email: extract email-parsing code into a subroutine

Ævar Arnfjörð Bjarmason Sun, 03 Dec 2017 14:01:05 -0800

On Sat, Dec 02 2017, Payre Nathan jotted:

> From: Nathan Payre <[email protected]>
>
> The existing code mixes parsing of email header with regular
> expression and actual code. Extract the parsing code into a new
> subroutine 'parse_header_line()'. This improves the code readability
> and make parse_header_line reusable in other place.
>
> Signed-off-by: Nathan Payre <[email protected]>
> Signed-off-by: Matthieu Moy <[email protected]>
> Signed-off-by: Timothee Albertin <[email protected]>
> Signed-off-by: Daniel Bensoussan <[email protected]>
> ---
>
> This patch is a first step to implement a new feature.
> See new feature discussion here: 
> https://public-inbox.org/git/[email protected]/
>
>  git-send-email.perl | 106 
> +++++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 80 insertions(+), 26 deletions(-)
>
> diff --git a/git-send-email.perl b/git-send-email.perl
> index 2208dcc21..98c2e461c 100755
> --- a/git-send-email.perl
> +++ b/git-send-email.perl
> @@ -715,41 +715,64 @@ EOT3
>       if (!defined $compose_encoding) {
>               $compose_encoding = "UTF-8";
>       }
> -     while(<$c>) {
> +
> +     my %parsed_email;
> +     while (<$c>) {
>               next if m/^GIT:/;
> -             if ($in_body) {
> -                     $summary_empty = 0 unless (/^\n$/);
> -             } elsif (/^\n$/) {
> -                     $in_body = 1;
> -                     if ($need_8bit_cte) {
> +             parse_header_line($_, \%parsed_email);
> +             if (/^\n$/i) {
> +                     while (my $row = <$c>) {
> +                             if (!($row =~ m/^GIT:/)) {
> +                                     $parsed_email{'body'} = 
> $parsed_email{'body'} . $row;
> +                             }
> +                     }
> +             }
> +     }
> +     if ($parsed_email{'from'}) {
> +             $sender = $parsed_email{'from'};
> +     }
> +     if ($parsed_email{'in_reply_to'}) {
> +             $initial_reply_to = $parsed_email{'in_reply_to'};
> +     }
> +     if ($parsed_email{'subject'}) {
> +             $initial_subject = $parsed_email{'subject'};
> +             print $c2 "Subject: " .
> +                     quote_subject($parsed_email{'subject'}, 
> $compose_encoding) .
> +                     "\n";
> +     }
> +     if ($parsed_email{'mime-version'}) {
> +             $need_8bit_cte = 0;
> +     }
> +     if ($need_8bit_cte) {
> +             if ($parsed_email{'content-type'}) {
> +                             print $c2 "MIME-Version: 1.0\n",
> +                                      "Content-Type: 
> $parsed_email{'content-type'};",
> +                                      "Content-Transfer-Encoding: 8bit\n";
> +                     } else {
>                               print $c2 "MIME-Version: 1.0\n",
>                                        "Content-Type: text/plain; ",
> -                                        "charset=$compose_encoding\n",
> +                                      "charset=$compose_encoding\n",
>                                        "Content-Transfer-Encoding: 8bit\n";
>                       }
> -             } elsif (/^MIME-Version:/i) {
> -                     $need_8bit_cte = 0;
> -             } elsif (/^Subject:\s*(.+)\s*$/i) {
> -                     $initial_subject = $1;
> -                     my $subject = $initial_subject;
> -                     $_ = "Subject: " .
> -                             quote_subject($subject, $compose_encoding) .
> -                             "\n";
> -             } elsif (/^In-Reply-To:\s*(.+)\s*$/i) {
> -                     $initial_reply_to = $1;
> -                     next;
> -             } elsif (/^From:\s*(.+)\s*$/i) {
> -                     $sender = $1;
> -                     next;
> -             } elsif (/^(?:To|Cc|Bcc):/i) {
> -                     print __("To/Cc/Bcc fields are not interpreted yet, 
> they have been ignored\n");
> -                     next;
> -             }
> -             print $c2 $_;
>       }
> +     if ($parsed_email{'body'}) {
> +             $summary_empty = 0;
> +             print $c2 "\n$parsed_email{'body'}\n";
> +     }
> +
>       close $c;
>       close $c2;
>
> +     open $c2, "<", $compose_filename . ".final"
> +             or die sprintf(__("Failed to open %s.final: %s"), 
> $compose_filename, $!);
> +
> +     print "affichage : \n";
> +     while (<$c2>) {
> +             print $_;
> +     }
> +
> +     close $c2;
> +
>       if ($summary_empty) {
>               print __("Summary email is empty, skipping it\n");
>               $compose = -1;
> @@ -792,6 +815,37 @@ sub ask {
>       return;
>  }
>
> +sub parse_header_line {
> +     my $lines = shift;
> +     my $parsed_line = shift;
> +
> +     foreach (split(/\n/, $lines)) {
> +             if (/^From:\s*(.+)$/i) {
> +                     $parsed_line->{'from'} = $1;
> +             } elsif (/^To:\s*(.+)$/i) {
> +                     $parsed_line->{'to'} = [ parse_address_line($1) ];
> +             } elsif (/^Cc:\s*(.+)$/i) {
> +                     $parsed_line->{'cc'} = [ parse_address_line($1) ];
> +             } elsif (/^Bcc:\s*(.+)$/i) {
> +                     $parsed_line->{'bcc'} = [ parse_address_line($1) ];
> +             } elsif (/^Subject:\s*(.+)\s*$/i) {
> +                     $parsed_line->{'subject'} = $1;
> +             } elsif (/^Date: (.*)/i) {
> +                     $parsed_line->{'date'} = $1;
> +             } elsif (/^In-Reply-To:\s*(.+)\s*$/i) {
> +                     $parsed_line->{'in_reply_to'} = $1;
> +             } elsif (/^Message-ID: (.*)$/i) {
> +                     $parsed_line->{'message_id'} = $1;
> +             } elsif (/^MIME-Version:$/i) {
> +                     $parsed_line->{'mime-version'} = $1;
> +             } elsif (/^Content-Type:\s+(.*)\s*$/i) {
> +                     $parsed_line->{'content-type'} = $1;
> +             } elsif (/^References:\s+(.*)/i) {
> +                     $parsed_line->{'references'} = $1;
> +             }
> +     }
> +}
> +
>  my %broken_encoding;
>
>  sub file_declares_8bit_cte {


I haven't read the patches that follow. Completely untested, But just a
diff on top I came up with while reading this:

Rationale:

 * Once you start passing $_ to functions you should probably just give
   it a name.

 * !($x =~ m//) you can just write as $x !~ m//

 * There's a lot of copy/paste programming in parse_header_line() and an
   inconsistency between you seeing A-Header and turning it into either
   a_header or a-header. If you just stick with a-header and use dash
   you end up with just two cases.

   The resulting line is quite long, so it's worth doing:

   my $header_parsed   = join "|", qw(To CC ...);
   my $header_unparsed = join "|", qw(From Subject Message-ID ...);
   [...]
   if ($str =~ /^($header_unparsed)
   

diff --git a/git-send-email.perl b/git-send-email.perl
index 98c2e461cf..3696cad456 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -717,12 +717,12 @@ EOT3
        }

        my %parsed_email;
-       while (<$c>) {
-               next if m/^GIT:/;
-               parse_header_line($_, \%parsed_email);
-               if (/^\n$/i) {
+       while (my $line = <$c>) {
+               next if $line =~ m/^GIT:/;
+               parse_header_line($line, \%parsed_email);
+               if ($line =~ /^\n$/i) {
                        while (my $row = <$c>) {
-                               if (!($row =~ m/^GIT:/)) {
+                               if ($row !~ m/^GIT:/) {
                                        $parsed_email{'body'} = 
$parsed_email{'body'} . $row;
                                }
                        }
@@ -731,7 +731,7 @@ EOT3
        if ($parsed_email{'from'}) {
                $sender = $parsed_email{'from'};
        }
-       if ($parsed_email{'in_reply_to'}) {
+       if ($parsed_email{'in-reply-to'}) {
                $initial_reply_to = $parsed_email{'in_reply_to'};
        }
        if ($parsed_email{'subject'}) {
@@ -820,28 +820,10 @@ sub parse_header_line {
        my $parsed_line = shift;

        foreach (split(/\n/, $lines)) {
-               if (/^From:\s*(.+)$/i) {
-                       $parsed_line->{'from'} = $1;
-               } elsif (/^To:\s*(.+)$/i) {
-                       $parsed_line->{'to'} = [ parse_address_line($1) ];
-               } elsif (/^Cc:\s*(.+)$/i) {
-                       $parsed_line->{'cc'} = [ parse_address_line($1) ];
-               } elsif (/^Bcc:\s*(.+)$/i) {
-                       $parsed_line->{'bcc'} = [ parse_address_line($1) ];
-               } elsif (/^Subject:\s*(.+)\s*$/i) {
-                       $parsed_line->{'subject'} = $1;
-               } elsif (/^Date: (.*)/i) {
-                       $parsed_line->{'date'} = $1;
-               } elsif (/^In-Reply-To:\s*(.+)\s*$/i) {
-                       $parsed_line->{'in_reply_to'} = $1;
-               } elsif (/^Message-ID: (.*)$/i) {
-                       $parsed_line->{'message_id'} = $1;
-               } elsif (/^MIME-Version:$/i) {
-                       $parsed_line->{'mime-version'} = $1;
-               } elsif (/^Content-Type:\s+(.*)\s*$/i) {
-                       $parsed_line->{'content-type'} = $1;
-               } elsif (/^References:\s+(.*)/i) {
-                       $parsed_line->{'references'} = $1;
+               if (/^(To|Cc|Bcc):\s*(.+)$/i) {
+                       $parsed_line->{lc $1} = [ parse_address_line($2) ];
+               } elsif 
(/^(From|Subject|Date|In-Reply-To|Message-ID|MIME-Version|Content-Type|References):\s*(.+)\s*$/i)
 {
+                       $parsed_line->{lc $1} = $2;
                }
        }
 }

Re: [PATCH] send-email: extract email-parsing code into a subroutine

Reply via email to