#! /usr/bin/perl
### -------------------------------------------------------------------------
### Program: bbc_l1.pl:
### Copyright (c) 2005 Robin Gilks
###
### This program is free software; you can redistribute it and/or
### modify it under the terms of the GNU General Public License
### as published by the Free Software Foundation; either version 2
### of the License, or (at your option) any later version.
###
### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
### GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License
### along with this program; if not, write to the Free Software
### Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
### -------------------------------------------------------------------------
###
### This is a MythStream parser or harvester
### The idea is to get the RealPlayer historic content from the various
### radio stations that the BBC runs. Most programs are available for streaming
### for up to 7 days after broadcast.
###    MythStream passes in the URL which is specified in streams.res
###    or the database so we get eg 
###  streams.res = radio/aod/networks/radio4/audiolist.shtml
###  get http://www.bbc.co.uk/radio/aod/networks/radio4/audiolist.shtml
###    we parse out the lines with 'bbcplayer' in them eg.
###  <li><a href="aod.shtml?radio4/archers" target="bbcplayer">The Archers</a></li>
###  <li><a href="aod.shtml?radio4/just" target="bbcplayer">Just a Minute</a></li>
###  <li><a href="aod.shtml?radio4/afternoonplay" target="bbcplayer">Afternoon Play</a></li>
###    We keep the <a> tag as the name and if there is anything after the </a>
###    then we use that as the description. We then pass back with a 
###    handler=level2 to display the list we have eg.
###  <item>
###      <name>Afternoon Play</name>
###      <url>aod.shtml?radio4/afternoonplay</url>
###      <handler>bbc_l2</handler>
###  </item>
###    When the user selects a show we parse the page returned to
###    find the 'var AudioStream = "...." url and extract it eg.
###  var AudioStream = "/radio/aod/shows/rpms/radio4/archers";
###    Append '.rpm' and get that eg.
###  get http://www.bbc.co.uk/radio/aod/shows/rpms/radio4/archers.rpm
###    The contents of the rpm file is the rtsp url we want which
###    we return!! eg.
###  <item>
###      <name>Dummy</name>
###      <url>rtsp://rmv8.bbc.net.uk/radio4/archers/archers_mon.ra</url>
###  </item>
###    This is thankfully played immediately :-))
### -------------------------------------------------------------------------

use English;
use XML::DOM;
use HTML::TokeParser;

#------------------------------------------------------------------------------
# Init
#------------------------------------------------------------------------------

&read_parse();    # get commandline parameters into @in
$source = $in[0]; # source filename from command line

my $doc = XML::DOM::Document->new;
my $head = $doc->createXMLDecl ('1.0');
my $root = $doc->createElement('items');

sub newNode
{
  local $name  = shift;
  local $value = shift;
  local $node = $doc->createElement($name);
  local $text = $doc->createTextNode($value);
  $node->appendChild($text);
  
  return $node;
}

#------------------------------------------------------------------------------
# read file into $data
#------------------------------------------------------------------------------

$datafile = $source;
open( INFO, "<$datafile" );      # Open file for reading
undef $/;
$data = <INFO>;                 # Read all
close(INFO) ;

#------------------------------------------------------------------------------
# search url's in $data and create XML
#------------------------------------------------------------------------------


my $p = HTML::TokeParser->new(\$data);
my $s_name = "";

while (my $token = $p->get_tag("a")) {
   my $url = $token->[1]{href};
   my $name = $p->get_trimmed_text("/a");
   my $target = $token->[1]{target}; 
   my $class = $token->[1]{class};
   my $description = $p->get_trimmed_text("a", "/li", "/div", "br", "/span");
   $description =~ s/-//g;	# remove -
   if ($class =~ /txday/) {
      $name = $s_name." - ".$name;
   } else {
      $s_name = $name;
   }
   
   if ($target =~ /bbcplayer/ ) {
      $item = $doc->createElement('item');
      $root->appendChild($item);
      $item->appendChild( newNode('name', $name) );
      $item->appendChild( newNode('url', $url ) );
      $item->appendChild( newNode('handler', "bbc_l2") );
      $item->appendChild( newNode('descr', $description) );
   }
}

print $head->toString;
print $root->toString;
print "\n";

#--------------------------------------------------------------------------------
# get command line parameters
#--------------------------------------------------------------------------------

sub read_parse 
{
  local (*in) = @_ if @_;
  local ($i);
  push(@in, @ARGV);
  foreach $i (0 .. $#in) { $in[$i] =~ s/\+/ /g;}
  return scalar(@in);
}