Mercurial > hg > egg-tcls
view fetch_weather.pl @ 687:7f1a0d25aa40
fetch_weather: Cleanups, fix counting of FMI stations.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Thu, 15 Jun 2023 13:25:32 +0300 |
parents | 053d5662bb16 |
children | 8a6cca58e853 |
line wrap: on
line source
#!/usr/bin/perl -w ########################################################################## # # Fetch Weather v1.2.0 by Matti 'ccr' Hamalainen <ccr@tnsp.org> # (C) Copyright 2014-2021 Tecnic Software productions (TNSP) # This script is freely distributable under GNU GPL (version 2) license. # # Should be ran as a cronjob, and configured properly. # */10 * * * * perl -w /absolute/path/to/fetch_weather.pl /path/to/configfile # # Configuration file example is in "config.fetch_weather.example" # For debugging/testing, try ./fetch_weather.pl to see commandline options. # # Requires various Perl modules, in Debian the packages should be: # libwww-perl libxml-simple-perl libtimedate-perl libjson-perl # libfile-slurper-perl # ########################################################################## use 5.18.0; use strict; use warnings; use utf8; use Encode; use LWP::UserAgent; use HTTP::Message; use HTML::Entities; use Compress::Zlib; use XML::Simple; use Date::Format; use Date::Parse; use Data::Dumper; use File::Slurper qw(read_text write_binary); use JSON; ### ### Configuration settings ### my %settings = ( "force_update" => 0, "debug" => 0, "dump" => 0, "opt_fmi" => 0, "opt_tiehallinto" => 0, "purge_threshold" => 60, "outfile" => "", "http_user_agent" => "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 6.0) Opera 10.63 [en]", "tiehallinto_meta" => "tiehallinto.meta", "tiehallinto_meta_period" => 7, "tiehallinto_rw_url" => "https://tie.digitraffic.fi/api/v1/data/weather-data", "tiehallinto_meta_url" => "https://tie.digitraffic.fi/api/v1/metadata/weather-stations", "fmi_weather_base_url" => "https://opendata.fmi.fi/wfs", "fmi_weather_extra_params" => "&maxlocations=300&bbox=19,59,32,75", ); ### ### Helper functions ### sub mlog($) { print STDERR $_[0]; } sub fetch_http($) { my $agent = LWP::UserAgent->new; $agent->agent(opt_get("http_user_agent")); $agent->timeout(20); my $req = HTTP::Request->new(GET => $_[0]); $req->header('Accept-Encoding' => scalar HTTP::Message::decodable()); print STDERR "# FETCHING URL: ".$_[0]."\n" if (opt_get_int("debug") > 0); my $res = $agent->request($req); if (opt_get_int("debug") > 0) { print STDERR "# Response: ".$res->code.": ".$res->message."\n"; if ($res->code >= 200 && $res->code <= 201) { print STDERR "# Content-charset: ".$res->content_charset."\n". "# Content-encoding: ".$res->content_encoding."\n". "# Is decoded_content UTF8? ".(utf8::is_utf8($res->decoded_content) ? "yes" : "NO!")."\n"; } } return $res; } sub str_trim($) { my $tmp = $_[0]; $tmp =~ s/^\s*//; $tmp =~ s/\s*$//; return $tmp; } sub parse_timestamp($$) { my ($str, $offs) = @_; if ($str =~ /^(\d+):(\d+)$/) { return $offs + (60 * 60 * $1) + ($2 * 60); } else { return $offs; } } sub format_time_gmt($) { # 2012-02-27T00:00:00Z return time2str("%Y-%m-%dT%TZ", $_[0], "UTC"); } sub force_decode_utf8($) { if (!utf8::is_utf8($_[0])) { return decode("utf8", $_[0]); } else { return $_[0]; } } ### Return either data or if not defined, empty string sub plonk_data { return defined($_[0]) ? $_[0] : (defined($_[1]) ? $_[1] : ""); } ### Same as plonk_data() but also lowercase the data string sub plonk_data_lc($) { return defined($_[0]) ? lc($_[0]) : (defined($_[1]) ? $_[1] : ""); } ### ### Configuration handling ### sub opt_chk_bool($) { if (defined($settings{$_[0]})) { my $val = $settings{$_[0]}; return ($val == 1 || $val eq "true" || $val eq "on" || $val eq "1"); } else { return 0; } } sub opt_chk_valid($$) { if (defined($settings{$_[0]})) { my $val = $settings{$_[0]}; return length($val) >= $_[1]; } else { return 0; } } sub opt_get_int($) { if (defined($settings{$_[0]})) { return int($settings{$_[0]}); } else { return -1; } } sub opt_get($) { if (defined($settings{$_[0]})) { return $settings{$_[0]}; } else { return undef; } } sub opt_read_config($) { my $filename = $_[0]; my $errors = 0; my $line = 0; open(CONFFILE, "<", $filename) or die("Could not open configuration '".$filename."'!\n"); while (<CONFFILE>) { $line++; chomp; if (/(^\s*#|^\s*$)/) { # Ignore comments and empty lines } elsif (/^\s*\"?([a-zA-Z0-9_]+)\"?\s*=>?\s*(\d+),?\s*$/) { my $key = lc($1); my $value = $2; if (defined($settings{$key})) { $settings{$key} = $value; } else { mlog("[$filename:$line] Unknown setting '$key' = $value\n"); $errors = 1; } } elsif (/^\s*\"?([a-zA-Z0-9_]+)\"?\s*=>?\s*\"(.*?)\",?\s*$/) { my $key = lc($1); my $value = $2; if (defined($settings{$key})) { $settings{$key} = $value; } else { mlog("[$filename:$line] Unknown setting '$key' = '$value'\n"); $errors = 1; } } else { mlog("[$filename:$line] Syntax error: $_\n"); $errors = 1; } } close(CONFFILE); return $errors; } ### ### Main program begins ### my $weatherdata = {}; my $opt_cfgfile; while (scalar(@ARGV) > 0) { my $arg = shift; if ($arg eq "-force") { $settings{"force_update"} = 1; } elsif ($arg eq "-debug") { $settings{"debug"} = 1; } elsif ($arg eq "-dump") { $settings{"dump"} = 1; } else { die("Configuration file already specified!\n") if defined($opt_cfgfile); $opt_cfgfile = $arg; } } if (!defined($opt_cfgfile)) { die( "Weather Fetch v1.0 by ccr/TNSP <ccr\@tnsp.org>\n". "Usage: $0 <config file> [options]\n". "\n". " -force : Force updating of all data\n". " -debug : Enable debug\n". " -dump : Dump received raw data\n". "\n" ); } opt_read_config($opt_cfgfile) == 0 or die("Errors while parsing configuration file '".$opt_cfgfile."'.\n"); print STDERR "Forcing update of all data.\n" if opt_chk_bool("force_update"); ### ### Load already cached data ### if (opt_chk_valid("outfile", 1) && !opt_chk_bool("force_update")) { my $filename = opt_get("outfile"); if (-e "$filename") { my $str = force_decode_utf8(read_text($filename)); if (defined($str)) { foreach my $line (split(/\s*\n\s*/, $str)) { my @mtmp = split(/\|/, $line, -1); if (scalar(\@mtmp) >= 3) { $weatherdata->{shift @mtmp} = \@mtmp; } } print STDERR scalar(keys %$weatherdata)." old records reloaded.\n" if (opt_get_int("debug") > 0); } } } ### ### Fetch Tiehallinto data ### if (opt_chk_bool("opt_tiehallinto")) { my $uri = opt_get("tiehallinto_rw_url"); print STDERR "Fetching Tiehallinto road weather data from ".$uri."\n" if (opt_get_int("debug") > 0); my $res = fetch_http($uri); if ($res->code >= 200 && $res->code <= 201) { my $json_str = force_decode_utf8($res->decoded_content); my $data = JSON->new->decode($json_str); print $json_str if opt_chk_bool("dump"); if (!defined($data->{"dataUpdatedTime"}) || !defined($data->{"weatherStations"})) { print STDERR "ERROR: REST/JSON call result did not contain required data.\n"; print STDERR $json_str."\n\n"; } else { # Check if we need to update the static meta data my $meta_file = opt_get("tiehallinto_meta"); my $meta_stamp = (-e $meta_file) ? (stat($meta_file))[9] : -1; my $fetch_meta = ($meta_stamp + 60*60*24 * opt_get_int("tiehallinto_meta_period")) < time(); # Fetch or read the cache my $meta_str; if ($fetch_meta || opt_chk_bool("force_update")) { my $uri = opt_get("tiehallinto_meta_url"); print STDERR "Fetching Tiehallinto static meta data from ".$uri."\n" if (opt_get_int("debug") > 1); my $res = fetch_http($uri); die("Failed to fetch $uri data.\n") unless ($res->code <= 200 && $res->code <= 201); $meta_str = force_decode_utf8($res->decoded_content); $fetch_meta = 1; } else { print STDERR "Using CACHED Tiehallinto static meta data from '".$meta_file."'.\n" if (opt_get_int("debug") > 0); $meta_str = force_decode_utf8(read_text($meta_file)); } print STDERR "Is meta_str UTF8? ".(utf8::is_utf8($meta_str) ? "yes" : "NO!")."\n" if (opt_get_int("debug") > 0); # Parse the data .. my $meta_data = {}; my $json = JSON->new->decode($meta_str); if ($fetch_meta) { # Save new cache, in more optimal form, if needed. print STDERR "Storing to cache '".$meta_file."'.\n" if (opt_get_int("debug") > 0); write_binary($meta_file, JSON->new->utf8->encode($json)); } foreach my $ms (@{$json->{"features"}}) { if (defined($ms->{"properties"}) && defined($ms->{"geometry"}{"coordinates"}) && defined($ms->{"properties"}{"names"}{"fi"})) { $meta_data->{$ms->{"id"}} = $ms; } } my $nrecords = 0; foreach my $wdata (@{$data->{"weatherStations"}}) { my $wid = $wdata->{"id"}; if (defined($meta_data->{$wid}) && defined($wdata->{"sensorValues"})) { $wdata->{"sensors"} = {}; foreach my $sensor (@{$wdata->{"sensorValues"}}) { $wdata->{"sensors"}->{$sensor->{"oldName"}} = $sensor; } $nrecords++; $weatherdata->{$meta_data->{$wid}{"properties"}{"names"}{"fi"}} = [ # Measurement source type 1, # Basic data plonk_data($meta_data->{$wid}{"geometry"}{"coordinates"}[1], 0), plonk_data($meta_data->{$wid}{"geometry"}{"coordinates"}[0], 0), plonk_data($meta_data->{$wid}{"geometry"}{"coordinates"}[2], 0), str2time(plonk_data($wdata->{"measuredTime"})), plonk_data($wdata->{"sensors"}->{"airtemperature1"}->{"sensorValue"}), plonk_data($wdata->{"sensors"}->{"humidity"}->{"sensorValue"}), plonk_data($wdata->{"sensors"}->{"averagewindspeed"}->{"sensorValue"}), plonk_data($wdata->{"sensors"}->{"winddirection"}->{"sensorValue"}), # Station type dependant data "", # unused for station type 1 plonk_data($wdata->{"sensors"}->{"roadsurfacetemperature1"}->{"sensorValue"}), plonk_data($wdata->{"sensors"}->{"precipitation"}->{"sensorValueDescriptionFi"}), plonk_data($wdata->{"sensors"}->{"visibility"}->{"sensorValue"}), plonk_data($wdata->{"sensors"}->{"precipitation"}->{"sensorValue"}), ]; } else { print STDERR "Station ID #".$wid." not defined?\n" if (opt_get_int("debug") > 0); #.Dumper($meta_data->{$wid}); } } print STDERR $nrecords." records from Tiehallinto.\n" if (opt_get_int("debug") > 0); } } } ### ### Fetch FMI data ### if (opt_chk_bool("opt_fmi")) { my @fmitems = ( "temperature", "humidity", "windspeedms", "winddirection", "totalcloudcover", ); my $uri = opt_get("fmi_weather_base_url"). "?request=getFeature". "&storedquery_id=fmi::observations::weather::multipointcoverage". "&starttime=".format_time_gmt(time() - 10*60)."&endtime=".format_time_gmt(time()). "¶meters=".join(",", @fmitems). opt_get("fmi_weather_extra_params"); print STDERR "FMI URI: ".$uri."\n" if (opt_get_int("debug") > 0); my $res = fetch_http($uri); if ($res->code >= 200 && $res->code <= 201) { my $xml = XMLin(force_decode_utf8($res->decoded_content)); my $time_base = time(); print $res->decoded_content if opt_chk_bool("dump"); if (defined($xml->{"wfs:member"}{"omso:GridSeriesObservation"})) { my $fdata = $xml->{"wfs:member"}{"omso:GridSeriesObservation"}; my $fshit = $fdata->{"om:result"}{"gmlcov:MultiPointCoverage"}; my @position_lines = split(/\n/, $fshit->{"gml:domainSet"}{"gmlcov:SimpleMultiPoint"}{"gmlcov:positions"}); my @data_lines = split(/\n/, $fshit->{"gml:rangeSet"}{"gml:DataBlock"}{"gml:doubleOrNilReasonTupleList"}); my @farray = (); if (scalar(@position_lines) == scalar(@data_lines)) { for (my $nline = 0; $nline < scalar(@position_lines); $nline++) { my $dline = str_trim($data_lines[$nline]); my $pline = str_trim($position_lines[$nline]); my @fmatches = ($dline =~ /\s*([\+\-]?\d+\.\d*|NaN)\s*/ig); if (scalar(@fmatches) != scalar(@fmitems)) { print STDERR "Not enough items in scalar line #".$nline." (". scalar(@fmatches). " vs ".scalar(@fmitems)."): ".$dline."\n" if (opt_get_int("debug") > 0); } else { my $vtmp = {}; for (my $fni = 0; $fni < scalar(@fmitems); $fni++) { $$vtmp{$fmitems[$fni]} = $fmatches[$fni] if (lc($fmatches[$fni]) ne "nan"); } if ($pline =~ /^\s*([\+\-]?\d+\.\d*)\s+([\+\-]?\d+\.\d*)\s+(\d+)\s*$/) { $$vtmp{"lat"} = $1; $$vtmp{"long"} = $2; $$vtmp{"time"} = $3; push(@farray, $vtmp); } else { print STDERR "Data mismatch #".$nline.": ".$pline."\n"; } } } } else { print STDERR "Position and data line counts do not match ". scalar(@position_lines)." <> ".scalar(@data_lines)."\n"; goto skip_it; } # XXX Hashify the array into lat/long keys # This is horrible :S my $nrecords = 0; foreach my $xnode (@{$fdata->{"om:featureOfInterest"}{"sams:SF_SpatialSamplingFeature"}{"sams:shape"}{"gml:MultiPoint"}{"gml:pointMember"}}) { my $floc = $xnode->{"gml:Point"}; if ($floc->{"gml:name"} ne "" && $floc->{"gml:pos"} =~ /^\s*([\+\-]?\d+\.\d*)\s+([\+\-]?\d+\.\d*)\s*$/) { my ($flat, $flong) = ($1, $2); # Should use a hash - foreach my $frec (@farray) { # If lat/long matches, and location is not yet defined, or # if timestamp is newer, store to location data if ($frec->{"lat"} == $flat && $frec->{"long"} == $flong && (!defined($weatherdata->{$floc->{"gml:name"}}) || $frec->{"time"} >= $weatherdata->{$floc->{"gml:name"}}[4]) ) { $nrecords++ unless defined($weatherdata->{$floc->{"gml:name"}}); $weatherdata->{$floc->{"gml:name"}} = [ # Measurement source type 2, # Basic data plonk_data($frec->{"lat"}, 0), plonk_data($frec->{"long"}, 0), 0, plonk_data($frec->{"time"}), plonk_data($frec->{"temperature"}), plonk_data($frec->{"humidity"}), plonk_data($frec->{"windspeedms"}), plonk_data($frec->{"winddirection"}), # Station type dependant data plonk_data($frec->{"totalcloudcover"}), ]; } } } } print STDERR $nrecords." records from FMI.\n" if (opt_get_int("debug") > 0); } else { # defined print STDERR "Invalid XML received:\n"; print STDERR $res->decoded_content."\n\n"; } } else { print STDERR "Error fetching FMI XML: ".$res->status_line."\n"; } } ### Skip here if the FMI shit fails due to broken data skip_it: ### ### Purge too old entries ### if (opt_chk_valid("purge_threshold", 1)) { my $purge = opt_get_int("purge_threshold"); if ($purge > 0) { my $wqtime = time(); my $nold = scalar(keys %$weatherdata); foreach my $key (keys %$weatherdata) { if ($wqtime - $weatherdata->{$key}[4] > (60 * $purge)) { delete $$weatherdata{$key}; } } my $nnew = scalar(keys %$weatherdata); print STDERR "Purged data older than ".$purge." minutes, ".$nold." -> ".$nnew." = ".($nold - $nnew)." removed.\n" if (opt_get_int("debug") > 0); } } ### ### Output ### if (opt_chk_valid("outfile", 1)) { my $filename = opt_get("outfile"); print STDERR "Dumping data to output file '".$filename."'\n" if (opt_get_int("debug") > 0); close(STDOUT); open(STDOUT, '>', $filename) or die("Could not open output file '".$filename."': $!\n"); } binmode STDOUT, ':encoding(utf-8)'; foreach my $key (sort { $a cmp $b } keys %$weatherdata) { print STDOUT $key."|".join("|", @{$weatherdata->{$key}})."\n"; } close(STDOUT);