#!/opt/perl/bin/perl
$^W = 1;	# cannot have -w on shebang line with /usr/bin/env
# $Id: stacov2x,v 1.24 2007/09/13 16:47:39 bjn Exp $
# -----------------------------------------------------------------------------
# Copyright 2004-2007, by the California Institute of Technology. ALL RIGHTS
# RESERVED. United States Government Sponsorship acknowledged. Any
# commercial use must be negotiated with the Office of Technology Transfer
# at the California Institute of Technology.
#
# This software may be subject to U.S. export control laws and regulations.
# By accepting this document, the user agrees to comply with all applicable
# U.S. export laws and regulations. User has the responsibility to obtain
# export licenses, or other export authority as may be required before
# exporting such information to foreign countries or providing access to
# foreign persons.
# -----------------------------------------------------------------------------

use strict;
use Cwd qw(getcwd chdir abs_path);
use File::Basename;
use File::Copy;
use Getopt::Long;
use Pod::Usage;

my $apply_scale_in_diff = 0;

@ARGV > 0 or pod2usage(-exitval => 2, -verbose => 0);

my $input_file;
my $ref_file;
my $output_file;
my $opt_escale    = 3.0; # Uncertainty scaling factor in project
my $opt_threshold = 7.8; # 95% CL for Chi-square with 3 DOF
my $opt_nc;
my $opt_noproject;
my $opt_apply,
my $opt_metrics;
my $opt_verbose;
my $opt_help;
my $opt_man;

GetOptions (
            "frame=s"     => \$ref_file,
            "input=s"     => \$input_file,
            "output=s"    => \$output_file,
            "escale=s"    => \$opt_escale,
            "threshold=s" => \$opt_threshold,
            "nc"          => \$opt_nc,
            "noproject"   => \$opt_noproject,
            "apply"       => \$opt_apply,
            "metrics"     => \$opt_metrics,
            "verbose"     => \$opt_verbose,
            "H"           => \$opt_help,
            "help"        => \$opt_help,
            "man"         => \$opt_man,
            ) or pod2usage(-exitval => 1, -verbose => 0);
        
pod2usage(-exitval => 2, -verbose => 1) if ($opt_help);
pod2usage(-exitval => 2, -verbose => 2) if ($opt_man);

## Required options
if (! defined $ref_file) {
    pod2usage(-msg => "Reference frame stacov file required",
              -exitval => 2, -verbose => 0);
}
if (! defined $input_file) {
    pod2usage(-msg => "Input stacov file required",
              -exitval => 2, -verbose => 0);
}
if (! defined $output_file) {
    pod2usage(-msg => "Output X-file name requiredd",
              -exitval => 2, -verbose => 0);
}

## Required input files
if (! -f $input_file) {
    die "ERROR: $input_file  not found\n";
}
if (! -f $ref_file) {
    die "ERROR: $ref_file  not found\n";
}

## Reoprt program version and threshold
system "ident $0 | grep Id";
print "RSQ threshold = ", $opt_threshold, "\n";

## Create work area in /tmp
my $prog = basename($0, "");
my $tempdir = '/tmp/'.$prog.'.'.$$;
my $cwd = getcwd;
if (! -e $tempdir) {
    mkdir $tempdir;
}
copy $ref_file, $tempdir;
$ref_file = basename($ref_file, "");
copy $input_file, $tempdir;
$input_file = basename($input_file, "");

chdir $tempdir;

## Get the epoch from the stacov header
my $bname = basename($input_file, ".stacov");
open IN, "< $input_file" or die "ERROR: $input_file: $!\n";
$_ = <IN>;
close IN;
@_ = split;
my $epoch = substr($_[-1], 0, 7); 

my $cmd;
my $status;

## Project the input file to remove common-mode covariance
## Note that the SCIGN combination increases JPL sigmas by 2.4
my $project_file = $bname.".project";
if ($opt_noproject) {
    # Do not remove constraints
    $cmd = sprintf("project -i %s -o %s -e %.2f",
                   $input_file, $project_file, $opt_escale);
}
else {
    $cmd = sprintf("project -i %s -o %s -r -t -s -e %.2f",
                   $input_file, $project_file, $opt_escale);
}
print $cmd, "\n";
$status = system "$cmd > /dev/null";
if ($status) {
    $status >>= 8;
    die "$0: $cmd failed with exit status $status\n";
}

## Apply the linear velocity model to the frame file to bring to the
## current epoch. 
my $frame_file = $bname.".frame";
#$cmd = "stamrg -i $ref_file -o $frame_file -v $epoch -verb";
$cmd = "posmrg -i $ref_file -o $frame_file -v $epoch -verb";
print $cmd, "\n";
$status = system "$cmd >/dev/null";
if ($status) {
    $status >>= 8;
    die "$0: $cmd failed with exit status $status\n";
}

## Iterate to find best solution
my $old_chisq_reduced;
my $iteration = 0;
my $trans_file = $bname.".transform";
my %sites;
my %original_sites;
print "Excluded sites with normalized residuals:\n";
print "Iter   chisq    dof chisq/dof Site     rN      rE      rU    rsq\n";
while (1) {
    $iteration++;

    ## Determine the transformation
    $cmd = "transform".
        " -f $frame_file".
        " -i $project_file".
        " -o $trans_file".
        " -r".                 # Apply estimated rotation to output
        " -t".                 # Apply estimated translation to output
        " -s".                 # Apply estimated scale to output
        " -verb";              # Append frame identification to output
    if ($opt_nc) {
        $cmd .= " -nc";
    }
    $status = system "$cmd > xfile.$$";
    if ($status) {
        $status >>= 8;
        die "$0: $cmd failed with exit status $status\n";
    }
    if ($opt_verbose) {
        my $fname = sprintf("%s/xfile.%02d", $cwd, $iteration);
        copy "xfile.$$", $fname;
    }

    ## Open the new X file and read the parameters
    my %param;
    open X, "< xfile.$$" or die "xfile.$$: $!";
    while (<X>) {
        s/^\s+//;
        last if /^$/;
        @_ = split;
        $param{$_[0]} = $_[2];
    }
    while (<X>) {
        last if /^ NAME     N/;
    }

    ## Identify the site with the worst normalized 3d residual
    my $worst_site;
    my $worst_rsq = 0;
    while (<X>) {
        last if /^ WRMS/;
        chomp;
        s/^.//;
        @_ = split;
        my $site = $_[0];
        if ($iteration == 1) {
            $original_sites{$site} = 1;
            $sites{$site} = {};
        }
        my @d    = @_[1..3];    # Residuals
        my @sd   = @_[4..6];    # Standard errors

        # For each site compute the sum of the squared normalized residuals
        # This would be distributed as chi-square with 3 degrees of freedom
        # if each site coordinate was independent and normally distributed.
        # In practice the coordinates for a site are not independent, but
        # that is ignored here.
        my $rsumsq = 0;
        foreach my $i (0..2) {
            $sites{$site}{$i} = 0;
            # Handle overflows
            if ($d[$i] =~ /\*/) {
                $rsumsq = 999999;
                last;
            }
            my $r = $d[$i]/$sd[$i];
            $sites{$site}{$i} = $r;
            $rsumsq += $r*$r;
        }
        $sites{$site}{RSQ} = $rsumsq;
        if ($rsumsq > $worst_rsq) {
            $worst_site = $site;
            $worst_rsq = $rsumsq;
        }
    }
    close X;

    ## Report result of this iteration
    my $chisq_reduced = $param{'CHI^2'}/$param{DOF};
    printf("%2d:  %.3e  %3d  %5.2f  ",
           $iteration, $param{'CHI^2'}, $param{DOF}, $chisq_reduced);

    ## Test for convergence
    if ($worst_rsq < $opt_threshold) {
        print "  Converged by RSQ\n";
        last;
    }

    ## Remove largest outlier for next iteration
    #printf "%s  rsq = %.2f\n", $worst_site, $worst_rsq;
    printf "  %s", $worst_site;
    foreach my $i (0..2) {
        printf "  %6.2f", $sites{$worst_site}{$i};
    }
    printf "  %6.2f\n", $sites{$worst_site}{RSQ};
    $cmd = "rmsta $worst_site $project_file $project_file >/dev/null";
    $status = system "$cmd";
    if ($status) {
        $status >>= 8;
        die "$0: $cmd failed with exit status $status\n";
    }
    delete $sites{$worst_site};
    $original_sites{$worst_site} = 0;
}

if ($opt_apply) {
    ## Align input stacov to the frame using the new X-file
    $cmd = "apply -x xfile.$$ -i $input_file -o fix.stacov -r -t";
    if ($apply_scale_in_diff) {
        print "Applying scale in diff\n";
        $cmd .= " -s";
    }
    else {
        print "Not applying scale in diff\n";
    }
    print $cmd, "\n";
    $status = system "$cmd >/dev/null 2>&1";
    if ($status) {
        print "$cmd FAILED\n";
    }

    ## Report differences between aligned stacov and frame
    my @de_list;
    my @dn_list;
    my @du_list;
    $cmd = "station_diff $frame_file fix.stacov";
    open DIFF, "$cmd |" or die "$cmd: $!";
    while (<DIFF>) {
        last if /^STA /;
    }
    print "Site  E(cm)  N(cm)  U(cm)\n";
    while (<DIFF>) {
        next if /^\s/;  # Skip page breaks and other blank lines
        my ($site, undef, undef, undef, $de, $dn, $du) = split;
        push @de_list, $de;
        push @dn_list, $dn;
        push @du_list, $du;
        my $x;
        if ($sites{$site}) {
            $x = ' ';
        }
        else {
            $x = 'x';
        }
        printf("%s  %5.1f  %5.1f  %5.1f %s\n", $site, $de, $dn, $du, $x);
    }
    printf("Median difference (cm) E %6.2f  N %6.2f  U %6.2f\n",
           median(@de_list), median(@dn_list), median(@du_list));
}

unlink $frame_file;
unlink $project_file;
unlink $trans_file;

chdir $cwd;
copy $tempdir.'/'."xfile.$$", $output_file;
copy "$tempdir/fix.stacov" , $cwd if $opt_apply;
system "rm -rf $tempdir";

# Auxiliary output files for testing and metrics
if ($opt_metrics) {
    open OUT, "> xsites" or die "xsites: $!";
    foreach my $site (sort keys %original_sites) {
        print OUT $site, '  ', $original_sites{$site}, "\n";
    }
    close OUT;
}

exit 0;

sub numerically {$a <=> $b}

sub median {
    my @list = sort numerically @_;
    my $n = scalar @list;
    my $mid = $n/2;
    my $median;
    if ($n % 2 == 0) {
        $median = 0.5 * ($list[$mid-1] + $list[$mid]);
    }
    else {
        $median = $list[$mid];
    }
    return $median;
}


sub usage {
    die "usage: $0 -f frame.stacov -i input.stacov -o xfile\n";
}

__END__

=head1 NAME

stacov2x  - Calculate 7-parameter Helmert transformation from stacov files

=head1 SYNOPSIS

    stacov2x -f frame.stacov -i input.stacov -o xfile 
             [-e escale] [-t threshold] [-nc] [-noproject]
             [-metrics] [-v]

    stacov2x [-H|-h|-help] [-man]

=head1 DESCRIPTION

This program uses the GIPSY components
project, stamrg, transform, and rmsta to create an
X-file containing the
7 parameter Helmert transformation from the frame defined by the input stacov
file to the reference frame realization defined by another stacov file.
Outlier sites are removed one by one until the largest squared normalized 3-d
residual is below the specified threshold.

The algorithm:

=over 4

Remove common covariance from input using project

Move frame to epoch using stamrg

Repeat:

      Create X-file using transform

      Remove largest outlier

Until largest outlier below threshold

=back

=head1 OPTIONS

=over 8

=item B<-f frame.stacov>

Required. The path to a stacov file that defines the reference frame
realization to be used.

=item B<-i input.stacov>

Required. The path to the stacov file to be transformed.

=item B<-o xfile>

Required. The path for the output X-file. This file contains the 7-parameter
transformation as well as additional statistical information.

=item B<-e escale>

Optional. Set the uncertainty scaling factor in project. The default scaling
is 2.4 times.

=item B<-t threshold>

Optional. Set the outlier removal threshold. The cut is applied to the sum
of the squared ENU residuals reported by transform, each normalized by its
reported standard deviation. The default threshold is set to 7.8, which is the
95% confidence limit for a Chi-square distribution with three degrees of
freedom.

=item B<-nc>

Optional. Use diagonal covariance terms only when creating the X-file, by 
using the -nc option in transform. This option should be used only when the
frame file has been created from individual site time series, e.g.,
jpl00.stacov, and not when it
contains the full correlation matrix, e.g., if derived from an IGS SINEX 
file.  

=item B<-noproject>

Optional. Do not project input stacov before alignment to the frame.

=item B<-apply>

Optional. Apply the new X-file to the input stacov, not including the scale,
and report the median E, N, U residuals. Leave the transformed stacov file
in place. 

=item B<-metrics>

Optional. Writes a list of input sites to the file xsites, with second field
set to 1 id the site was used in the final alignment, and 0 if it was excluded.

=item B<-v>

Optional. Enable verbose reporting, and save the X-file from each iteration.

=back

=head1 AUTHOR

Brian Newport

=cut
