#!/usr/bin/perl

# calculates statistics on columns of numbers
#
# input: columns of numbers, separated by whitespace
# output: columns of numbers, tab separated, with optional label at end
#
# Each row of output is a statistical function calculated across each
# coresponding column of input, for every row.  Each different statistic
# is output on its own line.  Allows for number of columns per
# line/row to vary.  The "rows" statistic always has just one number column. 
#
# For average, the counter is global per input line.  So if some
# columns are always countable and others are not, the average may not
# be what you expect.
#
# Available statistics are:
#
# -c --count			count of input rows (like "wc -l")
# -a --avg --average		average
# -m --minmax --min --max	minimum and maximum
# -s --sum			sum total (addition)
#
# If not particular statistic(s) are specified, reports all of them.
#
# Modifiers:
# 
# -f --float --places		Include X decimal places in output.
#				If -f is given without number, assume 2.
#				If -f is not given, assume 0.
# -l --labelless --labeless	Supress the labels.
#

# ----------------------------------------------------------------------
# init

use strict;
use warnings;
use Scalar::Util qw(looks_like_number);
use Getopt::Long;

# running counters for each statistic
# array index is the column number
my (@min, @max, @sum);

# count of rows/lines seen in input
my $rows = 0;

Getopt::Long::Configure ("bundling");

my $opt_count;	# report count of rows/lines read?
my $opt_avg;	# report average value?
my $opt_minmax;	# report min and max value ?
my $opt_sum;	# report sum total of all values?
my $opt_label;	# supress labels in output?
my $opt_places;	# number of decimal places in output numbers

die ("command line parse error") if not GetOptions(
	'a|avg|average'		=>	\$opt_avg,
	'c|count'		=>	\$opt_count,
	'm|minmax|min|max'	=>	\$opt_minmax,
	's|sum'			=>	\$opt_sum,
	'f|float|places:2'	=>	\$opt_places,
	'l|labelless|labeless'	=>	\$opt_label,
	);

# if nothing was specified, default to all
if (not ($opt_minmax or $opt_sum or $opt_avg or $opt_count)) {
	$opt_minmax = $opt_sum = $opt_avg = $opt_count = 1;
	}

$opt_places = 0 if not defined $opt_places;

# ----------------------------------------------------------------------
# input

ROW: while (<>) {

    	my @fields = split;

	# skip lines with no data
	next ROW if @fields == 0;

	$rows++;

	# for each field
	FIELD: for (my $i = 0; $i < @fields; $i++) {

		# $f is current field
		my $f = $fields[$i];

		next FIELD if not looks_like_number $f;

		if (not defined $sum[$i]) {
			$sum[$i] = $f;
			}
		else {
			$sum[$i] += $f;
			}
	
		$max[$i] = $f if (not defined $max[$i]) or ($f > $max[$i]);
		$min[$i] = $f if (not defined $min[$i]) or ($f < $min[$i]);

		} # FIELD

        } # ROW

# ----------------------------------------------------------------------
# output

# printf format specifier
# any number od digits to left of decimal point
# $opt_places number of digits to right of decimal point
# followed by a tab
my $fmt = "%.${opt_places}f\t";

# We have to handle any given array member not being defined.
# So we do a conditional defined-or (//) with zero.

if ($opt_minmax) {

	# min
	for (my $i = 0; $i < @min; $i++) {
		my $min = $min[$i] // 0;
		printf $fmt, $min;
	}
	print "(MIN)" unless $opt_label;
	print "\n";

	# max
	for (my $i = 0; $i < @max; $i++) {
		my $max = $max[$i] // 0;
		printf $fmt, $max;
		}
	print "(MAX)" unless $opt_label;
	print "\n";

	} # minmax

if ($opt_avg) {
	for (my $i = 0; $i < @sum; $i++) {
		my $sum = $sum[$i] // 0;
		my $avg = $sum / $rows;
		printf $fmt, $avg;
		}
	print "(AVG)" unless $opt_label;
	print "\n";
	}

if ($opt_sum) {
	for (my $i = 0; $i < @sum; $i++) {
		my $sum = $sum[$i] // 0;
		printf $fmt, $sum;
		}
	print "(SUM)" unless $opt_label;
	print "\n";
	}

if ($opt_count) {	
	print $rows;
	print "\t(ROWS)" unless $opt_label;
	print "\n";
	}

# ----------------------------------------------------------------------

