#!/usr/bin/perl

# NAME
#
# duftp - analyze and report disk usage on an FTP server
#
# SYNOPSIS
#
# duftp [-h] [-s] [-c] [-v] [-u USER] [-p PASS] <HOST> [DIRECTORY]...
# 
# USAGE
#
# Give FTP server hostname as first regular argument
# Give directory(ies) to search as remaining regular arguments
# If username and password not given, assumes anonymous FTP
# Written for *nix but may work on other platforms
#
# OPTIONS
#
# -h	reports using human-friendly scale and suffix
# -s	summarize each argument (do not output size of each subdir)
# -c	print a grand total of all arguments 
# -u	specifies username for login (defaults to your username)
# -p	specifices password for login
#
# CAVEATS
#
# * Hard links will likely confuse things
# * Assumes the FTP server provides Unix-style directories
# * The directory parser is stupid and fragile
# 
# HISTORY
#
# 2010MAY01: Initial release
#
# LEGAL
#
# Written by Ben Scott <mailvortex@gmail.com>
# Free to use/change/distribute in any way for any purpose
# No warranty of any kind -- use at your own risk

##############################################################################
# includes and pragmas

use strict;
use warnings;
use Getopt::Long;
use Sys::Hostname;
use File::Basename;
use Net::Domain qw(hostfqdn hostdomain domainname);
use Net::FTP;

##############################################################################
# constants

# maximum width of status messages
# we assume
# - all terminals are at least 80 characters wide, so 80 is safe
# - last column may wrap, so 79 is safer
# this is a kludge, but I didn't want to muck around with terminal control
use constant MAX_WIDTH => 79;

# binary multiples
use constant {
        TiB => 1099511627776,
        GiB => 1073741824,
        MiB => 1048576,
        KiB => 1024,
}; # binary multiples

##############################################################################
# function prototypes

sub main();
sub process_args();
sub setup_ftp($$$);
sub process_dirs($);
sub size_of_dir($);
sub process_dirent($$);
sub human_size($);
sub print_size($$);
sub anon_passwd();
sub status($);
sub output($);
sub erase_line();
sub display_status();
sub diemsg($);
sub wmsg($);
sub dmsg($);

##############################################################################
# global variables

my $progname;		# program name
my $status_msg = '';	# status message, updated/displayed independently
my $ftp;		# Net::FTP object, used everywhere

# options
my $opt_summarize;	# output only size of each top-level argument
my $opt_total;		# output total of all args
my $opt_human;		# output human-friendly sizes
my $opt_verbose;	# display status messages
my $opt_debug;		# display debugging messages

##############################################################################
# main program

main;

##############################################################################
# subroutines

#-----------------------------------------------------------------------------
sub main () {
# as sub to isolate variables

# local variables
my $hostname;	# server to connect to
my $username;	# username to login as
my $password;	# password to login with
my $dirs;	# ref to list of directories to process

# get program name from file name
dmsg "\$0=$0";
$progname = basename($0);
dmsg "progname=$progname";

# get command line
($hostname, $username, $password, $dirs) = process_args;

setup_ftp($hostname, $username, $password);

process_dirs ($dirs);

status("Disconnecting");
$ftp->quit;

erase_line; # erase any remaining status message

dmsg 'end program';

} # main

#-----------------------------------------------------------------------------
sub process_args () {
# get arguments and options from command line
# gives: hostname, username, password, ref to list of dirs

# local variables
my $username;
my $password;
my $hostname;
my $GetOptions_result;
my $local_username;
my $local_hostname;

$GetOptions_result = GetOptions(
	'u|user|username=s'        => \$username,
	'p|pass|passwd|password=s' => \$password,
	'h|human|human-readable'   => \$opt_human,
	'c|total'     => \$opt_total,
	's|summarize' => \$opt_summarize,
	'v|verbose'   => \$opt_verbose,
	'd|debug'     => \$opt_debug,
	);

diemsg 'command line parse error' unless $GetOptions_result;

# no username or password given, assume anonymous
if ((not defined $username) and (not defined $password)) {
	$username = 'anonymous';
	$password = anon_passwd;
	}
	
if ((defined $username) and (not defined $password)) {
	diemsg 'username given without password';
	}

# password given without username; assume local username
if ((defined $password) and (not defined $username)) {
	dmsg 'attempting to get default username';
	$username = getlogin;
	diemsg 'username not specified and getlogin failed'
		if not defined $username;
	}

# get hostname
diemsg 'specify hostname as first argument' if ($#ARGV < 0);
$hostname = shift @ARGV;

# check directories
diemsg 'specify one or more directories as remaining arguments' if ($#ARGV < 0);

dmsg "hostname=<$hostname>";
dmsg "username=<$username>";
dmsg "password=<$password>";
dmsg $#ARGV+1 . ' directory argument(s)';
dmsg "option enabled: summarize" if $opt_summarize;
dmsg "option enabled: total" if $opt_total;
dmsg "option enabled: human" if $opt_human;
dmsg "option enabled: verbose" if $opt_verbose;

return ($hostname, $username, $password, \@ARGV);

} # process_args

#-----------------------------------------------------------------------------
sub setup_ftp($$$) {
# connects to and logs in to FTP server
# takes: host, username, password
# no return

# arguments
my $hostname = shift;
my $username = shift;
my $password = shift;

status("Connecting");
$ftp = Net::FTP->new($hostname)
	or diemsg "cannot open $hostname: $@\n";

status("Logging in");
$ftp->login($username, $password)
	or diemsg "${username}\@${hostname}: ${\($ftp->message)}";

} # setup_ftp

#-----------------------------------------------------------------------------
sub process_dirs($) {
# process each directory name given to us
# takes: Net::FTP object, ref to dir list
# no return

# arguments
my $dirs = shift;
my @dirs = @$dirs;

# local variables
my $size;
my $total = 0;

foreach my $dir (@dirs) {
	dmsg "------------------------------ top-level dir";
	dmsg "dir=<$dir>";
	$size = size_of_dir($dir);
	# always print total for a top-level directory argument
	print_size ($dir, $size);
	$total += $size;
	}

print_size('total', $total) if $opt_total;

} # process_dirs

#-----------------------------------------------------------------------------
sub size_of_dir($) {
# walks directory tree and returns size
# takes: FTP object, directory path
# output: none direct, but process_dirent may output
# gives: size of files in dir and all subdirs

# FIXME some FTP servers implement platform-independent format, should use that

# arguments
my $dir = shift;

# local variables
my @dir;	# FTP directory entries returned by Net::FTP
my $dirent;	# a single FTP directory entity
my $total = 0;	# total accumulated size

dir_status($dir);
dmsg "-------------------- size_of_dirs()";
dmsg "dir=<$dir>";

# get directory
@dir = $ftp->dir($dir);

# if we did not get directory, warn and return a size of zero
if (not @dir) {
	my $msg = $ftp->message;
	chomp($msg); # remove any trailing newline
	$msg =~ s{\n}{|}; # replace any embedded newlines with pipe
	wmsg "$dir: could not get directory: $msg";
	return 0;
}

# for each directory entry
foreach $dirent (@dir) {
	$total += process_dirent($dir, $dirent);
	}

# return accumulated size
return $total;

} # sub size_of_dir

#-----------------------------------------------------------------------------
sub process_dirent($$) {
# helper for size_of_dir
# takes: directory path name, dirent (directory entry)
# output: size of subdirectory, if dirent is subdir, unless $opt_summarize
# gives: size of directory entry
# may call size_of_dir recursively

# arguments
my $dir = shift;
my $dirent = shift;

# local variables
my $subdir;	# subdir path
# fields of an FTP directory entry
my $mode;	# file mode in trwxrwxrwx format
my $links;	# link count
my $uowner;	# user owner
my $gowner;	# group owner
my $size;	# size
my $month;	# month
my $day;	# date
my $year_or_time;	# either the year or the time
my $name;	# name (assumed to be all remaining characters)
# fields derived from a dir ent
my $type;	# single character file type (from mode)

# debugging separator header
dmsg "---------- process_dirent() ";
dmsg "dirent=<$dirent>";

# some FTP servers give a "total" line, which we skip
if ($dirent =~ m/^total/) {
	dmsg 'skipping "total" line';
	return 0;
	}

# split dirent into component fields
# FIXME very sensitive to differences in directory format
($mode, $links, $uowner, $gowner, $size, $month, $day, $year_or_time, $name) =
	split /\s+/, $dirent;

# make sure we got all fields
if ((not defined $mode) or (not defined $mode) or (not defined $links) or
	(not defined $uowner) or (not defined $gowner) or
	(not defined $size) or (not defined $month) or 
	(not defined $day) or (not defined $year_or_time) or
	(not defined $name))
	{
		wmsg "could not parse directory entry: $dirent";
		return 0;
		}

# dump fields (if debugging)
dmsg "name=<$name>";
dmsg "size=<$size>";
dmsg "mode=<$mode>";
dmsg "links=<$links>";
dmsg "uowner=<$uowner>";
dmsg "gowner=<$gowner>";
dmsg "year_or_time=<$year_or_time>";
dmsg "month=<$month>";
dmsg "day=<$day>";

# get entry type as first character of mode
($type, undef) = split (//, $mode, 2);
dmsg "type=<$type>";

# decide what to do based on entry type
if ($type eq '-') {
	# ordinary file
	# do nothing else
	}
elsif ($type eq 'd') {
	# directory (hopefully, a subdirectory)
	# just in case we ever see special dir entries
	if (($name eq '.') or ($name eq '..')) {
		$size = 0;
		}
	else {
		$subdir = $dir . '/' . $name;
		dmsg ">>> process_dirent call size_of_dir($subdir)";
		$size = size_of_dir($subdir);
		dmsg "<<< process_dirent from size_of_dir($subdir)";
		print_size($subdir, $size) unless $opt_summarize;
		}
	}
elsif ($type eq 'l') {
	# symbolic link
	dmsg "symlink";
	$size = 0;
	}
else {
	# something we didn't plan for
	wmsg "unexpected dir entry type: <$type>";
	$size = 0;
	}

dmsg "returning size=<$size>";
return $size;

} # process_dirent

#-----------------------------------------------------------------------------
sub print_size ($$) {
# takes: dir name, size
# outputs the formatted size and directory name

# arguments
my $dir  = shift;
my $size = shift;

$size = human_size($size) if $opt_human;

output "$size\t$dir";

} # print_size


#-----------------------------------------------------------------------------
sub anon_passwd() {
# returns a string to use as password for anonymous FTP

my $username;
my $hostname;
my $password;

# try to get hostname as FQDN	
$hostname = hostfqdn;
# if that failed, try plain hostname
$hostname = hostname if not defined $hostname;
# if that failed, lie
$hostname = 'example.com' if not defined $hostname;
dmsg "local hostname=<$hostname>";

# try to get username
$username = getlogin;
# if that failed, lie
$username = 'anonuser' if (not defined $username);
dmsg "local username=<$username>";

# build password
$password = $username . '@' . $hostname;
dmsg "anon password=<$password>";

return $password;

} # anon_passwd

#-----------------------------------------------------------------------------
sub human_size ($) {
# takes: size number
# gives: size with human-friendly suffix and scale (usually a string)

my $size = shift;

dmsg "human_size() input size=<$size>";
return sprintf("%.2fT", ($size / TiB)) if ($size > TiB);
return sprintf("%.2fG", ($size / GiB)) if ($size > GiB);
return sprintf("%.2fM", ($size / MiB)) if ($size > MiB);
return sprintf("%.2fK", ($size / KiB)) if ($size > KiB);

} # human_size

#-----------------------------------------------------------------------------
sub output ($) {
# displays regular output for the user
# handles clearing/redisplaying status information

# arguments
my $msg = shift;

erase_line; # clear any status message
print $msg, "\n";
display_status; # display any status message

} # output

#-----------------------------------------------------------------------------
sub status($) {
# updates status message

$status_msg = shift;
# truncate to max width, if needed
$status_msg = substr ($status_msg, 0, MAX_WIDTH);
erase_line;
display_status;

} # status

#-----------------------------------------------------------------------------
sub dir_status ($) {
# updates status message for directory processing
# no return

return unless $opt_verbose; # shortcut

# arguments
my $dir = shift;

# local "constants"
my $msg = 'Processing directory: ';

# truncate dir (if needed) to keep overall status msg within limits
$dir = substr($dir, 0, (MAX_WIDTH - length($msg)) );
status($msg . $dir);

} # dir_status

#-----------------------------------------------------------------------------
sub erase_line() {
# if verbose, erases line and puts cursor at start of line for status msg

return unless $opt_verbose;

# this is a kludge, but I didn't want to muck around with terminal control
print STDERR "\r"; # go to start of line
print STDERR ' ' x MAX_WIDTH; # overwrite with spaces
print STDERR "\r"; # go to start of line

} # erase_line

#-----------------------------------------------------------------------------
sub display_status() {
# displays current status message

return unless $opt_verbose;
print STDERR $status_msg; # no newline

} # display_status

#-----------------------------------------------------------------------------
sub diemsg ($) {

printf STDERR "\n" if $opt_verbose; # in case status message present
die "$progname: $_[0]\n";

}

#-----------------------------------------------------------------------------
sub wmsg ($) {
# warnng message

my $msg = shift;

erase_line;
warn "$msg\n";
display_status;

} # wmsg

#-----------------------------------------------------------------------------
sub dmsg($) {
# debug message

my $msg = shift;

return unless $opt_debug;
erase_line;
warn "$progname: DEBUG: $msg\n";
display_status;

} # dmsg

##############################################################################
# EOF duftp.pl


