クラウドサーバを GMO クラウドから Amazon Web Service の EC2 に移行するに当たって、今までの RSS クローラを修正。
正直ものすごい修正した。
#!/usr/bin/perl
use strict;
use warnings;
use Encode;
use LWP::Simple;
use XML::RSS;
use Time::Local;
use HTTP::Date;
use DateTime::Format::HTTP;
#binmode(STDOUT, ":utf8");
my @url = (
"https://pocolog.bass-world.net/feed/",
"https://pocolog.bass-world.net/tech/feed/",
"https://pocolog.bass-world.net/book/feed/",
"https://pocolog.bass-world.net/gadgets/feed/",
"https://pocolog.bass-world.net/music/feed/",
"https://pocolog.bass-world.net/cars/feed/",
"https://pocolog.bass-world.net/creditcard/feed/",
"https://pocolog.bass-world.net/travel/feed/"
);
my $line = "";
foreach my $url(@url){
my $document = LWP::Simple::get($url) or die();
my $rss = XML::RSS->new;
$rss->parse($document);
$line .= "<h3><a href = '" . $rss->channel->{link} . "'>" . $rss->channel->{title} . "</a></h3>\n";
$line .= "<p>" . $rss->channel->{description} . "</p>\n";
my $counter = 0;
$line .= "<ul>\n";
for(@{$rss->{items}}){
$counter++;
if(!$_){
last;
}
if($counter > 5){
last;
}
my $datetime = $_->{pubDate};
if(!$datetime){
last;
}
my $dt = DateTime::Format::HTTP->parse_datetime($datetime);
$dt->set_time_zone('Asia/Tokyo');
my $date = $dt->year . '.' . $dt->month . '.' . $dt->day;
$line .= "<li>" . $date;
$line .= " - ";
$line .= "<a href = '" . $_->{link} . "'>";
$line .= $_->{title} . "</a></li>\n";
}
$line .= "</ul>\n";
}
print encode('utf-8', $line);
#open(BLOG, ">", "./www/blogdata.html");
#print BLOG $line;
#close(BLOG);
   