#!/usr/bin/perl # # FetchEventData # # Fetch waveforms from pre-assembled event gathers. # The default web service are from the NCEDC, other web services may be # specified by setting the following environment variables: # # SERVICEBASE = the base URI of the service(s) to use (https://service.ncedc.org/) # TIMESERIESWS = complete URI of service (https://service.ncedc.org/ncedcws/eventdata/1) # # This program is written to select and fetch waveform data # from pre-assembled collection of waveforms for events. # # Dependencies: This script should run without problems on Perl # release 5.10 or newer, older versions of Perl might require the # installation of the following modules (and their dependencies): # Bundle::LWP (libwww-perl) # ## Data selection # # Data is generally selected by specifying network, station, location, # channel, quality, start time and end time. The name parameters may # contain wildcard characters. All input options are optional but # waveform requests should include a time window. Data may be # selected one of three ways: # # 1) Command line arguments: -N, -S, -L, -C, -Q, -s, -e # # 2) A BREQ_FAST formatted file, http://www.iris.edu/manuals/breq_fast.htm # # 3) A selection file containing a list of: # Net Sta Loc Chan Start End # # Example selection file contents: # II BFO 00 BHZ 2011-01-01T00:00:00 2011-01-01T01:00:00 # IU ANMO 00 BHZ 2011-01-01T00:00:00 2011-01-01T01:00:00 # IU COLA 00 BHZ 2011-01-01T00:00:00 2011-01-01T01:00:00 # # For the command line arguments and the selection file the network, # station location and channel fields may contain the common * and ? # wildcards, meaning zero-to-many and a single character respectively. # These fields may also be comma-separated lists, for example, the # network may be specified as II,IU,TA to select three networks. # ## Data output # # miniSEED: waveform data will be requested based on the selection and all # written to the single file. # # 2013.341 # - Initial coding by Doug Neuhauser, UC Berkeley Seismological Laboratory, # based on FetchData by Chad Trabant, IRIS Data Management Center, use strict; use File::Basename; use Getopt::Long; use LWP::UserAgent; use HTTP::Status qw(status_message); use HTTP::Date; use Time::HiRes; my $version = "2013.341"; my $scriptname = basename($0); # Default web service base my $servicebase = 'https://service.ncedc.org'; # Check for environment variable overrides for servicebase $servicebase = $ENV{'SERVICEBASE'} if ( exists $ENV{'SERVICEBASE'} ); # Web service for time series data my $timeseriesservice = "$servicebase/ncedcws/eventdata/1"; # Check for environment variable override for timeseriesservice $timeseriesservice = $ENV{'TIMESERIESWS'} if ( exists $ENV{'TIMESERIESWS'} ); my $useragent = "$scriptname/$version Perl/$] " . new LWP::UserAgent->_agent; my $usage = undef; my $verbose = undef; my $nobsprint = undef; my $eventid = undef; my $catalog = undef; my $net = undef; my $sta = undef; my $loc = undef; my $chan = undef; my $qual = "B"; my $starttime = undef; my $endtime = undef; my $selectfile = undef; my $bfastfile = undef; my $mslopt = undef; my $lsoopt = undef; my $appname = undef; my $outfile = undef; my $auth = undef; # Parse command line arguments Getopt::Long::Configure ("bundling_override"); my $getoptsret = GetOptions ( 'help|usage|h' => \$usage, 'verbose|v+' => \$verbose, 'nobs' => \$nobsprint, 'eventid|E=s' => \$eventid, 'catalog|c=s' => \$catalog, 'net|N=s' => \$net, 'sta|S=s' => \$sta, 'loc|L=s' => \$loc, 'chan|C=s' => \$chan, 'qual|Q=s' => \$qual, 'starttime|s=s' => \$starttime, 'endtime|e=s' => \$endtime, 'selectfile|l=s' => \$selectfile, 'bfastfile|b=s' => \$bfastfile, 'msl=s' => \$mslopt, 'lso' => \$lsoopt, 'appname|A=s' => \$appname, 'outfile|o=s' => \$outfile, 'timeseriesws=s' => \$timeseriesservice, ); my $required = ( defined $eventid || defined $selectfile ); if ( ! $getoptsret || $usage || ! $required ) { print "$scriptname: collect time series and related metadata (version $version)\n"; #:: print "https://service.ncedc.org/clients/\n\n"; print "Usage: $scriptname [options]\n\n"; print " Options:\n"; print " -v Increase verbosity, may be specified multiple times\n"; print " -E,--eventid Eventid\n"; print " -c,--catalog catalog for eventid\n"; print " -N,--net Network code, list and wildcards (* and ?) accepted\n"; print " -S,--sta Station code, list and wildcards (* and ?) accepted\n"; print " -L,--loc Location ID, list and wildcards (* and ?) accepted\n"; print " -C,--chan Channel codes, list and wildcards (* and ?) accepted\n"; print " -Q,--qual Quality indicator, default is best\n"; print " -s starttime Specify start time (YYYY-MM-DD,HH:MM:SS.ssssss)\n"; print " -e endtime Specify end time (YYYY-MM-DD,HH:MM:SS.ssssss)\n"; print " -l listfile Read list of selections from file\n"; print " -b bfastfile Read list of selections from BREQ_FAST file\n"; print " -msl length Limit returned data to a minimum segment length\n"; print " -lso Limit returned data to the longest segment only\n"; print " -A appname Application/version string for identification\n"; print "\n"; print " -o outfile Fetch time series data and write to output file\n"; print "\n"; exit 1; } if ( ! $outfile ) { die "No output options specified, try -h for usage information\n"; } # Print script name and local time string if ( $verbose ) { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); printf STDERR "$scriptname ($version) at %4d-%02d-%02d %02d:%02d:%02d\n", $year+1900, $mon+1, $mday, $hour, $min, $sec; } # Check for eventid. if ( $outfile && ( ! defined $eventid ) ) { die "Cannot request event timeseries data without an eventid\n"; } # Normalize time strings given on the command line if ( $starttime ) { my ($year,$month,$mday,$hour,$min,$sec,$subsec) = split (/[-:,.\s\/T]/, $starttime); $starttime = sprintf ("%04d-%02d-%02dT%02d:%02d:%02d", $year, $month, $mday, $hour, $min, $sec); $starttime .= ".$subsec" if ( $subsec ); } if ( $endtime ) { my ($year,$month,$mday,$hour,$min,$sec,$subsec) = split (/[-:,.\s\/T]/, $endtime); $endtime = sprintf ("%04d-%02d-%02dT%02d:%02d:%02d", $year, $month, $mday, $hour, $min, $sec); $endtime .= ".$subsec" if ( $subsec ); } # An array to hold data selections my @selections = (); # Add command line selection to list if ( defined $net || defined $sta || defined $loc || defined $chan || defined $starttime || defined $endtime ) { push (@selections,"$net|$sta|$loc|$chan|$starttime|$endtime"); } # Read selection list file if ( $selectfile ) { print STDERR "Reading data selection from list file '$selectfile'\n"; &ReadSelectFile ($selectfile); } # Read BREQ_FAST file if ( $bfastfile ) { print STDERR "Reading data selection from BREQ_FAST file '$bfastfile'\n"; &ReadBFastFile ($bfastfile); } # Report complete data selections if ( $verbose > 2 ) { print STDERR "== Data selections ==\n"; foreach my $select ( @selections ) { print STDERR " $select\n"; } } # An array to hold channel list and metadata my %request = (); # Value is metadata range for selection # Build request hash directly from selections foreach my $selection ( @selections ) { my ($snet,$ssta,$sloc,$schan,$sstart,$send) = split (/\|/,$selection); # Subsitute non-specified fields with wildcards $snet = "*" if ( ! $snet ); $ssta = "*" if ( ! $ssta ); $sloc = "*" if ( ! $sloc ); $schan = "*" if ( ! $schan ); $request{"$snet|$ssta|$sloc|$schan|$sstart|$send"} = "$sstart|$send"; } # Report complete data request if ( $verbose > 2 ) { print STDERR "== Request list ==\n"; foreach my $req ( sort keys %request ) { print STDERR " $req (metadata: $request{$req})\n"; } } # Track bytes downloaded in callback handlers my $datasize = 0; # Fetch time series data if output file specified &FetchTimeSeriesData() if ( $outfile ); my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); printf STDERR "DONE at %4d-%02d-%02d %02d:%02d:%02d\n", $year+1900, $mon+1, $mday, $hour, $min, $sec; ## End of main ###################################################################### # ReadSelectFile: # # Read selection list file and add entries to the @selections array. # # Selection lines are expected to be in the following form: # # "Net Sta Loc Chan Start End" # # The Net, Sta, Loc and Channel fields are required and can be # specified as wildcards. Start and End are optional. ###################################################################### sub ReadSelectFile { my $selectfile = shift; open (SF, "<$selectfile") || die "Cannot open '$selectfile': $!\n"; foreach my $line ( ) { chomp $line; next if ( $line =~ /^\#/ ); # Skip comment lines my ($net,$sta,$loc,$chan,$start,$end) = split (' ', $line); next if ( ! defined $chan ); # Normalize time strings if ( $start ) { my ($year,$month,$mday,$hour,$min,$sec,$subsec) = split (/[-:,.\s\/T]/, $start); $start = sprintf ("%04d-%02d-%02dT%02d:%02d:%02d", $year, $month, $mday, $hour, $min, $sec); $start .= ".$subsec" if ( $subsec ); } if ( $end ) { my ($year,$month,$mday,$hour,$min,$sec,$subsec) = split (/[-:,.\s\/T]/, $end); $end = sprintf ("%04d-%02d-%02dT%02d:%02d:%02d", $year, $month, $mday, $hour, $min, $sec); $end .= ".$subsec" if ( $subsec ); } # Add selection to global list push (@selections,"$net|$sta|$loc|$chan|$start|$end"); } close SF; } # End of ReadSelectFile() ###################################################################### # ReadBFastFile: # # Read BREQ_FAST file and add entries to the @selections array. # ###################################################################### sub ReadBFastFile { my $bfastfile = shift; open (BF, "<$bfastfile") || die "Cannot open '$bfastfile': $!\n"; my $linecount = 0; BFLINE: foreach my $line ( ) { chomp $line; $linecount++; next if ( ! $line ); # Skip empty lines # Capture .QUALTIY header if ( $line =~ /^\.QUALITY .*$/ ) { ($qual) = $line =~ /^\.QUALITY ([DRQMBE])/; next; } next if ( $line =~ /^\./ ); # Skip other header lines my ($sta,$net,$syear,$smon,$sday,$shour,$smin,$ssec,$eyear,$emon,$eday,$ehour,$emin,$esec,$count,@chans) = split (' ', $line); # Simple validation of BREQ FAST fields if ( $sta !~ /^[A-Za-z0-9*?]{1,5}$/ ) { print "Unrecognized station code: '$sta', skipping line $linecount\n" if ( $verbose ); next; } if ( $net !~ /^[-_A-Za-z0-9*?]+$/ ) { print "Unrecognized network code: '$net', skipping line $linecount\n" if ( $verbose ); next; } if ( $syear !~ /^\d\d\d\d$/ ) { print "Unrecognized start year: '$syear', skipping line $linecount\n" if ( $verbose ); next; } if ( $smon !~ /^\d{1,2}$/ ) { print "Unrecognized start month: '$smon', skipping line $linecount\n" if ( $verbose ); next; } if ( $sday !~ /^\d{1,2}$/ ) { print "Unrecognized start day: '$sday', skipping line $linecount\n" if ( $verbose ); next; } if ( $shour !~ /^\d{1,2}$/ ) { print "Unrecognized start hour: '$shour', skipping line $linecount\n" if ( $verbose ); next; } if ( $smin !~ /^\d{1,2}$/ ) { print "Unrecognized start min: '$smin', skipping line $linecount\n" if ( $verbose ); next; } if ( $ssec !~ /^\d{1,2}\.?\d{0,6}?$/ ) { print "Unrecognized start seconds: '$ssec', skipping line $linecount\n" if ( $verbose ); next; } if ( $eyear !~ /^\d\d\d\d$/ ) { print "Unrecognized end year: '$eyear', skipping line $linecount\n" if ( $verbose ); next; } if ( $emon !~ /^\d{1,2}$/ ) { print "Unrecognized end month: '$emon', skipping line $linecount\n" if ( $verbose ); next; } if ( $eday !~ /^\d{1,2}$/ ) { print "Unrecognized end day: '$eday', skipping line $linecount\n" if ( $verbose ); next; } if ( $ehour !~ /^\d{1,2}$/ ) { print "Unrecognized end hour: '$ehour', skipping line $linecount\n" if ( $verbose ); next; } if ( $emin !~ /^\d{1,2}$/ ) { print "Unrecognized end min: '$emin', skipping line $linecount\n" if ( $verbose ); next; } if ( $esec !~ /^\d{1,2}\.?\d{0,6}?$/ ) { print "Unrecognized end seconds: '$esec', skipping line $linecount\n" if ( $verbose ); next; } if ( $count !~ /^\d+$/ || $count <= 0 ) { print "Invalid channel count field: '$count', skipping line $linecount\n" if ( $verbose ); next; } if ( scalar @chans <= 0 ) { print "No channels specified, skipping line $linecount\n" if ( $verbose ); next; } # Extract location ID if present, i.e. if channel count is one less than present my $loc = undef; $loc = pop @chans if ( scalar @chans == ($count+1) ); if ( $loc && $loc !~ /^[A-Za-z0-9*?\-]{1,2}$/ ) { print "Unrecognized location ID: '$loc', skipping line $linecount\n" if ( $verbose ); next; } foreach my $chan ( @chans ) { if ( $chan !~ /^[A-Za-z0-9*?]{3,3}$/ ) { print "Unrecognized channel codes: '$chan', skipping line $linecount\n" if ( $verbose ); next BFLINE; } } if ( scalar @chans != $count ) { printf "Channel count field ($count) does not match number of channels specified (%d), skipping line $linecount\n", scalar @chans if ( $verbose ); next; } # Normalize time strings my ($ssec,$ssub) = split (/\./, $ssec); my $start = sprintf ("%04d-%02d-%02dT%02d:%02d:%02d", $syear, $smon, $sday, $shour, $smin, $ssec); $start .= ".$ssub" if ( $ssub ); my ($esec,$esub) = split (/\./, $esec); my $end = sprintf ("%04d-%02d-%02dT%02d:%02d:%02d", $eyear, $emon, $eday, $ehour, $emin, $esec); $end .= ".$esub" if ( $esub ); # Add selection to global list for each channel foreach my $chan ( @chans ) { push (@selections,"$net|$sta|$loc|$chan|$start|$end"); } } close BF; } # End of ReadBFastFile() ###################################################################### # FetchTimeSeriesData: # # Collect time series data for each entry in the %request hash. All # returned data is written to the global output file (outfile). # # The request list is separatated into groups where the group size is # defined in terms of station-days. If the request for a group fails # it will be retried, after too many failures. # ###################################################################### sub FetchTimeSeriesData { # Open output file open (OUT, ">$outfile") || die "Cannot open output file '$outfile': $!\n"; # Create HTTP user agent my $ua = RequestAgent->new(); $ua->env_proxy; my $count = 0; my @reqs; foreach my $req ( sort keys %request ) { my ($wnet,$wsta,$wloc,$wchan,$wstart,$wend) = split (/\|/, $req); push (@reqs, "$wnet $wsta $wloc $wchan $wstart $wend"); ++$count; } print STDERR "Fetching time series data ($count selections)\n" if ( $verbose ); my $ftime = Time::HiRes::time; my $totalbytes = 0; my $redocnt = 0; my $outoffset = 0; { # Create web service URI my $query = "query"; my $uri = "${timeseriesservice}/$query"; # Create POST data selection: specify options followed by selections my $postdata = "eventid=$eventid\n"; $postdata .= "catalog=$catalog\n" if ( defined $catalog ); $postdata .= "minimumlength=$mslopt\n" if ( defined $mslopt ); $postdata .= "longestonly=true\n" if ( defined $lsoopt ); #:: $postdata .= "quality=$qual\n"; foreach my $req ( @reqs ) { $postdata .= "$req\n"; } print STDERR "Time series URI: '$uri'\n" if ( $verbose > 1 ); print STDERR "Data selection (POST):\n$postdata" if ( $verbose > 1 ); print STDERR "Downloading time series data :: " if ( $verbose ); $datasize = 0; # Fetch time series data from web service using callback routine my $response = $ua->post($uri, Content => $postdata, ':content_cb' => \&DLCallBack ); if ( $response->code == 204 ) { print (STDERR "No data available\n") if ( $verbose ); } elsif ( $response->code == 401 ) { print (STDERR "AUTHORIZATION FAILED, username and password not recognized\n"); last; } elsif ( ! $response->is_success() ) { print (STDERR "Error fetching data: " . $response->code . " :: " . status_message($response->code) . "\n"); print STDERR "------\n" . $response->decoded_content . "\n------\n"; print STDERR " URI: '$uri'\n" if ( $verbose > 1 ); # For real output files rewind position to the end of the last group data seek (OUT, $outoffset, 0) if ( $outfile ne "-" ); # Retry in 10 seconds or give up if already tried 60 times. if ( $response->code != 400 && $redocnt < 60 ) { print STDERR "Retrying request in 10 seconds\n"; sleep 10; $redocnt++; goto REDOGROUP; } else { print STDERR "Too many retries, giving up.\n"; last; } } else { printf (STDERR "%s\n", ($nobsprint)?sizestring($datasize):"") if ( $verbose ); } $outoffset = tell (OUT); $totalbytes += $datasize; } close OUT; my $duration = Time::HiRes::time - $ftime; my $rate = $totalbytes/(($duration)?$duration:0.000001); printf (STDERR "Received %s of time series data for event %s in %.1f seconds (%s/s)\n", sizestring($totalbytes), $eventid, $duration, sizestring($rate)); # Remove empty file unlink $outfile if ( -z $outfile ); } # End of FetchTimeSeriesData ###################################################################### # DLCallBack: # # A call back for LWP downloading. # # Write received data to output file, tally up the received data size # and print and updated (overwriting) byte count string. ###################################################################### sub DLCallBack { my ($data, $response, $protocol) = @_; print OUT $data; $datasize += length($data); if ( $verbose && ! $nobsprint ) { printf (STDERR "%-10.10s\b\b\b\b\b\b\b\b\b\b", sizestring($datasize)); } } ###################################################################### # sizestring (bytes): # # Return a clean size string for a given byte count. ###################################################################### sub sizestring { # sizestring (bytes) my $bytes = shift; if ( $bytes < 1000 ) { return sprintf "%d Bytes", $bytes; } elsif ( ($bytes / 1024) < 1000 ) { return sprintf "%.1f KB", $bytes / 1024; } elsif ( ($bytes / 1024 / 1024) < 1000 ) { return sprintf "%.1f MB", $bytes / 1024 / 1024; } elsif ( ($bytes / 1024 / 1024 / 1024) < 1000 ) { return sprintf "%.1f GB", $bytes / 1024 / 1024 / 1024; } elsif ( ($bytes / 1024 / 1024 / 1024 / 1024) < 1000 ) { return sprintf "%.1f TB", $bytes / 1024 / 1024 / 1024 / 1024; } else { return ""; } } # End of sizestring() ###################################################################### # # Package RequestAgent: a superclass for LWP::UserAgent with override # of LWP::UserAgent methods to set default user agent and handle # authentication credentials. # ###################################################################### BEGIN { use LWP; package RequestAgent; our @ISA = qw(LWP::UserAgent); sub new { my $self = LWP::UserAgent::new(@_); my $fulluseragent = $useragent; $fulluseragent .= " ($appname)" if ( $appname ); $self->agent($fulluseragent); $self; } sub get_basic_credentials { my ($self, $realm, $uri) = @_; if ( defined $auth ) { return split(':', $auth, 2); } elsif (-t) { my $netloc = $uri->host_port; print "\n"; print "Enter username for $realm at $netloc: "; my $user = ; chomp($user); return (undef, undef) unless length $user; print "Password: "; system("stty -echo"); my $password = ; system("stty echo"); print "\n"; # because we disabled echo chomp($password); return ($user, $password); } else { return (undef, undef) } } } # End of LWP::UserAgent override