CVSROOT: /web/www Module name: www Changes by: Jason Self <jxself> 11/03/26 16:04:44
Modified files: server/source/planetrss: planetrss.pl Added files: server/source/planetrss: changelog Log message: Updating planetrss.pl to version 1.1 CVSWeb URLs: http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/planetrss/planetrss.pl?cvsroot=www&r1=1.1&r2=1.2 http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/planetrss/changelog?cvsroot=www&rev=1.1 Patches: Index: planetrss.pl =================================================================== RCS file: /web/www/www/server/source/planetrss/planetrss.pl,v retrieving revision 1.1 retrieving revision 1.2 diff -u -b -r1.1 -r1.2 --- planetrss.pl 25 Mar 2011 15:22:49 -0000 1.1 +++ planetrss.pl 26 Mar 2011 16:04:25 -0000 1.2 @@ -1,56 +1,85 @@ -# PlanetRSS, Version 1.0 -# Copyright © 2011 Shailesh Ghadge + # PlanetRSS, Version 1.1 + # Copyright © 2011 Shailesh Ghadge -#This program is free software: you can redistribute it and/or modify -#it under the terms of the GNU General Public License as published by -#the Free Software Foundation, either version 3 of the License, or -#(at your option) any later version. -# -#This program is distributed in the hope that it will be useful, -#but WITHOUT ANY WARRANTY; without even the implied warranty of -#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -#GNU General Public License for more details. -# -#You should have received a copy of the GNU General Public License -#along with this program. If not, see <http://www.gnu.org/licenses/>. - -#Email: shail...@gnu.org #Date Created: 20 Mar 2011 -# -#Functionality: Fetch & save 'n' feeds from planet.gnu.org using RSS feed link http://planet.gnu.org/rss20.xml in html format -# Each feed is truncated to 'm' characters. - - -#-------------------------------------------- -use XML::RSS::Parser::Lite; -#Provides simple pure perl RSS parsing - -use LWP::Simple; -#Provides get(url) function - -#--------------------------------------------- -my $FeedLines = 3; # 'n' feeds -my $FeedLength = 200; # 'm' characters - - -my $PGfeeds = get("http://planet.gnu.org/rss20.xml"); -#Fetch RSS feeds as xml - -my $PGparser = new XML::RSS::Parser::Lite; -#Create new RSS parser - -open (PGhtml, '>planetfeeds.html'); + #This program is free software: you can redistribute it and/or modify + #it under the terms of the GNU General Public License as published by + #the Free Software Foundation, either version 3 of the License, or + #(at your option) any later version. + # + #This program is distributed in the hope that it will be useful, + #but WITHOUT ANY WARRANTY; without even the implied warranty of + #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + #GNU General Public License for more details. + # + #You should have received a copy of the GNU General Public License + #along with this program. If not, see <http://www.gnu.org/licenses/>. + + #Email: shail...@gnu.org #Date: 26 Mar 2011 + # + #Functionality: Compare with previously retrieved feeds(if any) and then if required, + # Fetch & save 'n' feeds from planet.gnu.org using RSS feed link http://planet.gnu.org/rss20.xml in html format + # Each feed is truncated to 'm' characters. + + + #-------------------------------------------- + use XML::RSS::Parser::Lite; + #Provides simple pure perl RSS parsing + + use LWP::Simple; + #Provides get(url) function + + #--------------------------------------------- + my $FeedLines = 3; # 'n' feeds + my $FeedLength = 200; # 'm' characters + + my $PGfeeds = get("http://planet.gnu.org/rss20.xml"); + #Fetch RSS feeds as xml + + my $PGparser = new XML::RSS::Parser::Lite; + #Create new RSS parser + + $PGparser->parse($PGfeeds); + #To Parse the supplied xml + + #-------------------Check---------------------- + my $Write2File = 1; #Default: We write to PlanetFeeds.html; + my $CompareFeeds = 1; + open (CurPGhtml, 'planetfeeds.html') || $CompareFeeds--; + if($CompareFeeds == 1) + { + my @Cur_Content = <CurPGhtml>; + my $Cur_Title = $Cur_Content[0]; + $Cur_Title=~ s/<(.*?)>//gi; $Cur_Title=~ s/<a(.*?)>//gi; $Cur_Title=~ s/<\/a>//gi; + $Cur_Title=~ s/<p>//gi; $Cur_Title=~ s/<\/p>//gi; $Cur_Title=~ s/<li>//gi; + $Cur_Title=~ s/<ul>//gi; $Cur_Title=~ s/<br \/>//gi; $Cur_Title =~ s/\s\s+/ /g; + $Cur_Title=substr($Cur_Title,0,index($Cur_Title,':')); + #Now we have Current Title + + my $New_Checker = $PGparser->get(0); + my $New_Title = $New_Checker->get('title'); + $New_Title = substr($New_Title,index($New_Title,':')+2); + #Now we have New Title -#--------------------------------------------- + if($Cur_Title eq $New_Title) + { + $Write2File=0; + } + #Decide whether to continue & write PGhtml + } + close(CurPGhtml); + #-------------------------End of Check----------------------------- -$PGparser->parse($PGfeeds); -#Parse the supplied xml + #print "content-type: text/html \n"; + # Use above if you get errors regarding headers -#print "content-type: text/html \n"; -# uncomment above if you get errors regarding headers + #---------------------To Create/Overwrite PlanetFeeds.html----------- + if($Write2File==1) + { -#Print Feeds data in the format of- "Title - Description... <a href='URL'>more</a>" -for (my $i = 0; $i < $FeedLines; $i++) -{ + open (PGhtml, '>planetfeeds.html'); + #Print Feeds data in the format of- "Title - Description... <a href='URL'>more</a>" + for (my $i = 0; $i < $FeedLines; $i++) + { my $PGfeed = $PGparser->get($i); my $PGurl = $PGfeed->get('url'); my $PGtitle = $PGfeed->get('title'); @@ -77,5 +106,9 @@ #Output print PGhtml "<p><a href='".$PGurl."'>".$PGtitle ."</a>: ".$PGdesc. "... <a href='".$PGurl."'>more</a></p>\n"; } -} -close(PGhtml); + } + close(PGhtml); + } + #------------------------------End of To Create/Overwrite PlanetFeeds.html------------------------- + + Index: changelog =================================================================== RCS file: changelog diff -N changelog --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ changelog 26 Mar 2011 16:04:25 -0000 1.1 @@ -0,0 +1,6 @@ +Version 1.1 - 26 Mar 2011 +Added Checker code, to first check previously retrieved feeds(if any) and only then proceed to write to planetfeeds.html if required. + +Version 1.0 - 20 Mar 2011 +The Perl script fetches & saves 'n' feeds from planet.gnu.org using RSS feed link http://planet.gnu.org/rss20.xml in html format. +Each feed is truncated to 'm' characters.