#!/usr/bin/perl -w # # Standalone version of Adsense Extended Reports: # http://ejelta.com/wmtools/adsense/ # # Copyright (c) 2003-2005 Andrew Maltsev # # Freely redistributable in original form, contact the author if not sure. # If you build a web accessible service based on this script, please # link back to the script's page at: # http://ejelta.com/wmtools/adsense/script/ ############################################################################### use strict; use Getopt::Long; use LWP::UserAgent; use HTTP::Cookies; use Unicode::Lite; use Text::CSV; ############################################################################### my $output_csv; my $from_file; my $random; my $period='last7days'; my $gorc=GetOptions( 'output-csv' => \$output_csv, 'from-file=s' => \$from_file, 'period=s' => \$period, 'random' => \$random, ); if(!$gorc || (!$from_file && !$random && @ARGV!=2)) { print < Freely redistributable in original form, contact the author if not sure. EOT exit 1; } ## # Retrieving if required # my $days; if($from_file) { open(F,$from_file) || die "Can't open $from_file: $!\n"; my $text=join('',); close(F); $days=adsense_report_parse($text); } elsif($random) { for(my $day=13; $day<20; ++$day) { $days->{"200403$day"}={ impr_number => int(rand(10000)+90000), click_number => int(rand(300)+900), earn_amount => int(rand(300)+100), }; } } else { my $text=adsense_report_retrieve( username => $ARGV[0], password => $ARGV[1], csv => 1, ); $days=adsense_report_parse_csv($text); } if($output_csv) { my $csv=Text::CSV->new; $csv->combine('Date','Impressions','Clicks','CTR','Earnings','PPC','EPM','Impr/$100'); print $csv->string,"\n"; foreach my $date (sort { $a <=> $b } keys %$days) { my $d=$days->{$date}; adsense_calc_fields($d); $csv->combine($date, $d->{impr_number}, sprintf('%u',$d->{click_number}), sprintf('%.2f',$d->{impr_number} ? 100*$d->{click_number}/$d->{impr_number} : 0), sprintf('%.2f',$d->{earn_amount}), sprintf('%.2f',$d->{pp_click}), sprintf('%.2f',$d->{pp_1000_impr}), sprintf('%u',$d->{impr_100bucks})); print $csv->string,"\n"; } } else { printf "Date Impr. Clicks CTR Earnings PPC EPM\n"; foreach my $date (sort { $a <=> $b } keys %$days) { my $d=$days->{$date}; adsense_calc_fields($d); printf '%s %7u %6u %5.2f%% %8.2f %5.2f %5.2f%s', $date, $d->{impr_number}, $d->{click_number}, $d->{impr_number} ? 100*$d->{click_number}/$d->{impr_number} : 0, $d->{earn_amount}, $d->{pp_click}, $d->{pp_1000_impr}, "\n"; } } exit(0); ############################################################################### sub adsense_calc_fields { my $d=shift; if(0+$d->{impr_number}) { $d->{ctr}=$d->{click_number}/$d->{impr_number}; $d->{pp_1000_impr}=1000*$d->{earn_amount}/$d->{impr_number}; } else { $d->{ctr}=0; $d->{pp_1000_impr}=0; } if(0+$d->{click_number}) { $d->{pp_click}=$d->{earn_amount}/$d->{click_number}; } else { $d->{pp_click}=0; } if(0+$d->{earn_amount}) { $d->{impr_100bucks}=100/$d->{earn_amount}*$d->{impr_number}; } else { $d->{impr_100bucks}=0; } } ############################################################################### sub adsense_report_retrieve { my $args=ref($_[0]) ? $_[0] : { @_ }; my $username=$args->{username} || die "adsense_report_retrieve - no 'username'"; my $password=$args->{password} || die "adsense_report_retrieve - no 'password'"; my $ua=LWP::UserAgent->new(); my $cjar=HTTP::Cookies->new(); $ua->cookie_jar($cjar); $ua->agent('Mozilla/4.0 (compatible; AdSense Extended Stats; http://ejelta.com/wmtools/adsense/)'); $ua->requests_redirectable([qw(GET HEAD POST)]); my $qstr='https://www.google.com/adsense/default'; my $req=HTTP::Request->new(GET => $qstr); my $res=$ua->request($req); $res->is_success || die "adsense_report_retrieve - HTTP error ($qstr), ".$res->code.' '.$res->message; $res=$ua->post('https://www.google.com/adsense/login.do',[ 'destination' => '', 'username' => $username, 'password' => $password, 'null' => 'Login', ]); $res->is_success || die "adsense_report_retrieve - HTTP error ($qstr), ".$res->code.' '.$res->message; my $content=$res->content; if($content =~ /Invalid/ && $content =~ /login.do/) { die "adsense_report_retrieve - Bad password"; } if($content !~ /Quick\s*Reports/i) { eprint $content; die "AdSense report page has unexpected content, this script is too old"; } $qstr='https://www.google.com/adsense/report/aggregate?' . 'product=afc&' . 'dateRange.dateRangeType=simple&' . "dateRange.simpleDate=$period&" . 'reportType=property&' . 'groupByPref=date&' . 'unitPref=page'; if($args->{'csv'}) { $qstr.='&outputFormat=TSV_EXCEL'; } else { $qstr.='&outputFormat=HTML'; } $req=HTTP::Request->new(GET => $qstr); $res=$ua->request($req); $res->is_success || die "adsense_report_retrieve - HTTP error ($qstr), ".$res->code.' '.$res->message; $content=$res->content; if($res->header('Content-Type') =~ m/utf-16/i) { $content=convert('utf16','latin1',$content); } elsif($res->header('Content-Type') =~ m/utf-8/i) { $content=convert('utf8','latin1',$content); } return $content; } ############################################################################### sub adsense_report_parse_csv { my $text=shift; $text=~/^Date\tPage/ || die "adsense_report_parse - does not look like an AdSense report"; my @rows=split(/[\r\n]+/,$text); shift @rows; my %days; foreach my $row (@rows) { last if $row =~ /Total/; my ($date,$impr,$clicks,$ctr,$cpm,$earnings)=split(/\t/,$row); $impr=~s/[\s,]+//g; $clicks=~s/[\s,]+//g; $ctr=~s/[\s\%,]+//g; $earnings=~s/[\s\$,]+//g; my $calc_ctr=sprintf('%.4f',$impr ? $clicks/$impr*100 : 0); if(abs($calc_ctr - $ctr) > 0.1) { die "adsense_report_parse - CTR does not match clicks/impressions ($clicks/$impr - expected $calc_ctr, got $ctr)"; } $date =~ m/^\s*(\d+)[\/-](\d+)[\/-](\d+)\s*$/ || die "Unparsable date '$date'"; my ($mnum,$mday,$year)=$1>2000 ? ($2,$3,$1) : ($1,$2,$3); $year+=2000 if $year<100; if(!$mnum) { die "Unparsable date '$date'"; } if($year<2000 || $year>2999) { die "Unparsable year '$year'"; } $days{sprintf('%04u%02u%02u',$year,$mnum,$mday)}={ impr_number => $impr, click_number => $clicks, earn_amount => $earnings, }; } return \%days; } ############################################################################### sub adsense_report_parse { my $text=shift; $text=~m/AdSense/s || die "adsense_report_parse - does not look like an AdSense report"; my $old_style=($text=~m/class="aggregateimpressions"/i) ? 1 : 0; my %days; my @rows=($text=~m{( .*? ) }xgis); foreach my $str (@rows) { my ($date,$impr,$clicks,$ctr,$cpm,$earnings); if($old_style) { ($date,$impr,$clicks,$ctr,$earnings)=($str=~m{ \s+(.*?)\s+ \s+ \s+(.*?)\s+ \s+ \s+(.*?)\s+ \s+ \s+(.*?)\s+ \s+ \s+(.*?)\s+ }xis); } else { ($date,$impr,$clicks,$ctr,$cpm,$earnings)=($str=~m{ \s+(.*?)\s+ \s+ \s+(.*?)\s+ \s+ \s+(.*?)\s+ \s+ \s+(.*?)\s+ \s+ \s+(.*?)\s+ \s+ \s+(.*?)\s+ }xis); } $impr=~s/[\s,]+//g; $clicks=~s/[\s,]+//g; $ctr=~s/[\s\%,]+//g; $earnings=~s/[\s\$,]+//g; my $calc_ctr=sprintf('%.4f',$impr ? $clicks/$impr*100 : 0); if(abs($calc_ctr - $ctr) > 0.1) { die "adsense_report_parse - CTR does not match clicks/impressions ($clicks/$impr - expected $calc_ctr, got $ctr)"; } my ($wday,$month,$mday,$year)=($date=~m{ ^ (Mon\w*|Tue\w*|Wed\w*|Thu\w*|Fri\w*|Sat\w*|Sun\w+),\s+ (\w+)\s+ (\d+),\s+ (\d+) $ }xis); my @mlist=qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my $mnum; for(my $i=0; $i<@mlist; ++$i) { if($month =~ m/^$mlist[$i]/i) { $mnum=$i+1; last; } } if(!$mnum) { die "Unparsable date '$date'"; } if($year<2000) { die "Unparsable year '$year'"; } ### dprint "date=$mnum/$mday/$year impr=$impr clicks=$clicks ctr=$ctr cctr=$calc_ctr earnings=$earnings\n"; $days{sprintf('%04u%02u%02u',$year,$mnum,$mday)}={ impr_number => $impr, click_number => $clicks, earn_amount => $earnings, }; } return \%days; } ############################################################################### sub t2hq ($) { my $text=shift; $text=~s/([\x00-\x20\x80-\xff\&\?<>;"=%#+])/"%".unpack("H2",$1)/sge; $text; } ###############################################################################