#!/usr/bin/perl # $Id: get_streams,v 2.4 2005/07/18 20:45:41 dave Exp $ =head1 NAME get_streams =head1 DESCRIPTION Program to spider the BBC Radio web site and look for all of the Real Audio programme streams. The streams that are found are then written to an YAML file for later processing elsewhere. =cut use strict; use warnings; require BBCRobot; use HTML::TreeBuilder; use URI; use YAML qw(DumpFile); my $yaml_file = shift || 'streams.yaml'; my $start_url = 'http://www.bbc.co.uk/radio/aod/index_noframes.shtml'; my $ua = BBCRobot->new; my $doc = HTML::TreeBuilder->new; $doc->parse(get($start_url)); my @stations; my $div = $doc->look_down(_tag => 'div', id => 'network'); die "No stations found\n" unless $div; foreach ($div->find('a')) { my $a = a_href($_, $start_url); warn "Getting $a->{text}\n"; my $station = get_station($a->{href}->as_string, lc $a->{text}); $station->{name} = $a->{text}; $station->{url} = $a->{href}->as_string; push @stations, $station; last if $ENV{BBC_STREAMS_DEBUG} and @stations >= 2; } DumpFile $yaml_file, \@stations; sub get { my $url = shift; my $r = $ua->get($url); die "$url => " . $r->status_line if $r->is_error; return $r->content; } sub get_station { my $url = shift; my $name = shift; my $doc = HTML::TreeBuilder->new; $doc->parse(get($url)); my $station; my $live_div = $doc->look_down(_tag => 'div', id => 'livelinks'); if ($live_div) { foreach ($live_div->find('a')) { push @{$station->{live}}, get_streams($_, $url); } } else { warn "No live links found at $url\n"; } my $az_div = $doc->look_down(_tag => 'div', id => 'az'); if ($az_div) { foreach my $prog ($az_div->find('li')) { warn " Getting " . $prog->as_text . "\n"; foreach ($prog->find('a')) { my $bcast = get_streams($_, $url); if ($bcast->{text} =~ /^(SUN|MON|TUE|WED|THU|FRI|SAT)$/) { push @{$station->{az}[-1]{days}}, $bcast; } else { push @{$station->{az}}, $bcast; } } last if $ENV{BBC_STREAMS_DEBUG} and @{$station->{az}} >= 2; } } else { warn "No program links found at $url\n"; } return $station; } sub get_streams { my $elem = shift; my $base = shift; my $a = a_href($elem, $base); my $doc = HTML::TreeBuilder->new->parse(get($a->{href}->as_string)); my $emb = $doc->find('embed'); unless ($emb) { warn 'No ' . $a->{href}->as_string . "\n"; return; } my %attrs = $emb->all_attr; my $ram = get(URI->new_abs($attrs{src}, $base)->as_string); my @streams = $ram =~ /^\s*((?:rtsp|pnm):\S+)/mg; $a->{standalone} = $doc->as_HTML =~ /stand-alone Real Player/; $a->{radplay} = $a->{href}->as_string; $a->{ram} = URI->new_abs($attrs{src}, $base)->as_string; $a->{streams} = \@streams; $a->{href} = $a->{href}->as_string; return $a; } sub a_href { my $a = shift; my $base = shift; my %a = $a->all_attr; my $text = $a->as_text; $text =~ s/^\s+//; $text =~ s/\s+$//; return { href => URI->new_abs($a{href}, $base), text => $text }; } =head1 AUTHOR Dave Cross Edave@dave.org.ukE =head1 COPYRIGHT Copyright 2005, Dave Cross, All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut