#!/usr/bin/perl -w
# durep - Disk Usage Report Generator #
# #
# Copyright (C) 2004 Damian Kramer ( #
# #
# You may distribute this program under the terms of the Artistic License. #
# #
# This program is distributed in the hope that it will be useful, but #
# WITHOUT ANY WARRANTY; without even the implied warranty of #
# Artistic License for more details. #
use Getopt::Long;
use File::Basename;
use MLDBM qw(DB_File Storable);
use Fcntl;
use Sys::Hostname;
use POSIX;
use Cwd qw(cwd);
use strict;
our ($version, %options);
our ($root_node, $filesystem_id, @stats);
our ($opt_help, $opt_version, $opt_textdepth, $opt_hidesize, $opt_showdate, $opt_nosort, $opt_quiet);
our ($opt_savefile, $opt_loadfile, $opt_desc, $opt_collate);
our ($opt_files, $opt_onefilesystem, $opt_collapsepath, $opt_excludepath, $opt_coalescefiles);
our ($root_dir, $file_count, $dir_count, $next_id);
$TYPE_DIR = 1;
$version = "0.9";
$| = 1;
$SIG{INT} = \&catchError;
%options = ("h|help" => \$opt_help,
"v|version" => \$opt_version,
"td|text-depth=i" => \$opt_textdepth,
"hs|hide-size=s" => \$opt_hidesize,
"sd|show-date" => \$opt_showdate,
"ns|nosort" => \$opt_nosort,
"q|quiet" => \$opt_quiet,
"sf|save-file=s" => \$opt_savefile,
"lf|load-file=s" => \$opt_loadfile,
"d|desc=s" => \$opt_desc,
"c|collate=s" => \$opt_collate,
"f|files" => \$opt_files,
"x|one-file-system" => \$opt_onefilesystem,
"cp|collapse-path=s" => \$opt_collapsepath,
"ep|exclude-path=s" => \$opt_excludepath,
"cf|coalesce-files=s" => \$opt_coalescefiles
&usage unless (GetOptions(%options));
&usage if(defined $opt_help);
if(defined $opt_version) {
print "durep version $version\n";
exit 0;
## Process options
if($opt_collate) {
exit 0;
$opt_hidesize = processSizeOption($opt_hidesize) if(defined $opt_hidesize);
$opt_coalescefiles = processSizeOption($opt_coalescefiles) if(defined $opt_coalescefiles);
doAbort("Depth must be greater than 0.") if(defined $opt_textdepth && $opt_textdepth < 1);
doAbort("You must specify a save file if you use --quiet.") if(defined $opt_quiet && !defined $opt_savefile);
doAbort("You can't use --load-file and --save-file at the same time.") if(defined $opt_loadfile && defined $opt_savefile);
if($opt_savefile) {
$opt_savefile .= ".ds" unless $opt_savefile =~ /\.ds$/;
push @ARGV, "." unless @ARGV;
$root_dir = shift;
chop $root_dir if $root_dir =~ m|./$|; # Remove trailing /
doAbort("`$root_dir' not a valid directory.") unless -d $root_dir;
# Get the absolute pathname rather than relative pathname
$_ = cwd();
chdir($root_dir) or doAbort("Unable to chdir to '$root_dir'.");
$root_dir = cwd();
if($opt_loadfile) {
tie %{$root_node}, 'MLDBM', "$opt_loadfile", O_RDONLY, 0640 or doAbort("Unable to tie file '$opt_loadfile'.");
else {
## Perform scan
$file_count = 0;
$dir_count = 0;
$next_id = 1;
$root_node = {};
if($opt_savefile) {
if(-r $opt_savefile) {
print "Removing existing savefile '$opt_savefile'.\n" unless $opt_quiet;
unlink $opt_savefile;
tie %{$root_node}, 'MLDBM', "$opt_savefile", O_CREAT|O_RDWR, 0640 or doAbort("Unable to tie file '$opt_savefile'.");
($filesystem_id) = stat $root_dir if defined $opt_onefilesystem;
$root_node->{1} = recursiveScan($root_dir, undef, 1);
my $data;
$data->{FILE_COUNT} = $file_count;
$data->{DIR_COUNT} = $dir_count;
$data->{LAST_UPDATE} = time;
$data->{HOSTNAME} = hostname();
$data->{DESC} = $opt_desc if $opt_desc;
$data->{OPTIONS}->{FILES} = 1 if $opt_files;
$data->{OPTIONS}->{ONEFILESYSTEM} = 1 if $opt_onefilesystem;
$data->{OPTIONS}->{COLLAPSEPATH} = $opt_collapsepath if $opt_collapsepath;
$data->{OPTIONS}->{EXCLUDEPATH} = $opt_excludepath if $opt_excludepath;
$data->{OPTIONS}->{COALESCEFILES} = $opt_coalescefiles if $opt_coalescefiles;
$root_node->{DATA} = $data;
# use Data::Dumper;
# print Dumper($root_node);
if(!$opt_quiet) {
printf "[ %s %s (%d files, %d dirs) ]\n", $root_node->{1}->{NAME}, prettyFileSize($root_node->{1}->{SIZE}),
$root_node->{1}->{FCOUNT}, $root_node->{1}->{DCOUNT};
printDir($root_node->{1}, 0);
if($opt_savefile || $opt_loadfile) {
untie %{$root_node};
exit 0;
## End of program.
sub recursiveScan {
my ($dir, $parent, $store) = @_;
my @children;
my $coalesced_count = 0;
my $coalesced_size = 0;
my $node = {};
my $temp;
my $dirhandle;
$node->{ID} = $next_id++;
if(defined $parent) {
$node->{NAME} = basename($dir);
$node->{PARENT} = $parent;
else {
$node->{NAME} = $dir;
$node->{SIZE} = 0;
$node->{TYPE} = $TYPE_DIR;
$node->{DCOUNT} = 0;
$node->{FCOUNT} = 0;
if($store) {
$store = 0 if($opt_collapsepath && $dir =~ m/$opt_collapsepath/);
$node->{TYPE} &= $TYPE_COLLAPSED unless $store;
opendir($dirhandle, $dir) or warn "Unable to open dir '$dir': $!\n" and return $node;
foreach(readdir($dirhandle)) {
@stats = lstat "$dir/$_" or warn "Unable to lstat '$dir/$_': $!\n" and next;
$node->{MTIME} = $stats[9] if($_ eq ".");
# Skip '.' and '..'
next if(/^\.{1,2}$/);
if(-d _ && ! -l _ && !$opt_files) {
if(! -x _) {
warn "Unable to read directory `$dir/$_'. Skipping.\n";
next if($opt_excludepath && "$dir/$_" =~ m/$opt_excludepath/);
next if($opt_onefilesystem && ($stats[0] != $filesystem_id));
$temp = recursiveScan("$dir/$_", $node->{ID}, $store);
$node->{SIZE} += $temp->{SIZE};
if($store) {
$root_node->{$temp->{ID}} = $temp;
push @children, $temp->{ID};
if($opt_coalescefiles && $stats[7] < $opt_coalescefiles) {
$coalesced_size += $stats[7];
else {
if($store) {
my $file = {};
$file->{ID} = $next_id++;
$file->{NAME} = $_;
$file->{SIZE} = $stats[7];
$file->{PARENT} = $node->{ID};
$file->{TYPE} = $TYPE_FILE;
$file->{MTIME} = $stats[9];
$root_node->{$file->{ID}} = $file;
push @children, $file->{ID};
$node->{SIZE} += $stats[7];
if($coalesced_count) {
if($store) {
my $file = {};
$file->{ID} = $next_id++;
$file->{NAME} = "[COALESCED FILES]";
$file->{SIZE} = $coalesced_size;
$file->{PARENT} = $node->{ID};
$file->{MTIME} = 0;
$file->{FCOUNT} = $coalesced_count;
$root_node->{$file->{ID}} = $file;
push @children, $file->{ID};
$node->{SIZE} += $coalesced_size;
if(@children) {
$node->{CHILDREN} = \@children;
else {
# $root_node->{$node->{ID}} = $node;
return $node;
sub collate {
my %db;
doAbort("'$opt_collate' is not a valid directory.") unless -d $opt_collate;
tie %db, 'MLDBM', "$opt_collate/durep.cds", O_CREAT|O_RDWR, 0640 or doAbort("Unable to tie file '$opt_collate/durep.cds'");
my @files;
my $next_id = 1;
opendir(DIR, $opt_collate) or doAbort("Unable to open dir '$opt_collate': $!");
foreach(readdir(DIR)) {
# Skip unless a .ds file
next unless(/\.ds$/);
push @files, $_;
foreach my $file (sort @files) {
my $id = $next_id++;
my %temp;
tie %temp, 'MLDBM', "$opt_collate/$file", O_RDONLY, 0640 or doAbort("Unable to tie file '$opt_collate/$file'");
my %data = (%{$temp{DATA}});
$data{FILENAME} = $file;
$data{SIZE} = $temp{1}->{SIZE};
$data{ID} = $id;
$data{PATH} = $temp{1}->{NAME};
$db{$id} = \%data;
untie %temp;
untie %db;
sub printDir {
my ($dir, $indent) = @_;
my @entries;
foreach my $entry (@{$dir->{CHILDREN}}) {
$entry = $root_node->{$entry};
next if(defined $opt_hidesize && $entry->{SIZE} < $opt_hidesize);
my $e = {};
$e->{NAME} = $entry->{NAME};
$e->{SIZE} = $entry->{SIZE};
$e->{TYPE} = $entry->{TYPE};
$e->{MTIME} = $entry->{MTIME};
$e->{CHILDREN} = $entry->{CHILDREN} if $entry->{CHILDREN};
push @entries, $e;
@entries = reverse sort sortBySize @entries unless $opt_nosort;
foreach my $entry (@entries) {
my $numofchars;
my $percent = $dir->{SIZE} == 0 ? 0 : ($entry->{SIZE}/$dir->{SIZE})*100;
print " " x $indent;
print prettyFileSize($entry->{SIZE});
$numofchars = int ((30 / 100) * $percent);
printf(" [%s%s] ", "#" x $numofchars, " " x (30-$numofchars));
printf("%6.2f%% ", $percent);
printf("%s ", shortDate($entry->{MTIME})) if $opt_showdate;
printf("%s%s\n", $entry->{NAME}, $entry->{TYPE} & $TYPE_DIR ? "/" : "");
if($entry->{TYPE} & $TYPE_DIR) {
printDir($entry, $indent+1) if(!defined $opt_textdepth || ($opt_textdepth > $indent+1));
sub processSizeOption {
my ($size, $temp);
if($_[0] =~ m/[bBkKmMgG]$/) {
($size, $temp) = $_[0] =~ m/^(.+)([bBkKmMgG])$/;
else {
$size = $_[0];
unless (defined $size && ($size =~ m/^\d+$/ || $size =~ m/^\d+\.\d+$/)) {
doAbort("Malformed argument: $_[0]");
if(defined $temp) {
if($temp =~ m/^[kK]/) {
return $size * 1024;
elsif ($temp =~ m/^[mM]/) {
return $size * 1048576;
elsif ($temp =~ m/^[mM]/) {
return $size * 1048576 * 1024;
return $size;
sub prettyFileSize {
my $val = $_[0];
my $dtype = "b";
if($val >= 1024) {
$val /= 1024;
$dtype = "K";
if($val >= 1024) {
$val /= 1024;
$dtype = "M";
if($val >= 1024) {
$val /= 1024;
$dtype = "G";
if($dtype eq "b") {
return sprintf("%6d%s", $val, $dtype);
else {
return sprintf("%6.1f%s", $val, $dtype);
sub shortDate {
if($_[0] < (time - 31536000)) {
return POSIX::strftime("%b %e %Y", localtime $_[0]);
return POSIX::strftime("%b %e %H:%M", localtime $_[0]);
sub sortBySize {
return $a->{SIZE} <=> $b->{SIZE};
### End program with error message
sub doAbort {
warn "Error: $_[0]\n";
exit 1;
sub catchError {
warn "Program interrupted.\n";
if($opt_savefile || $opt_loadfile) {
untie %{$root_node};
exit 1;
sub usage {
print <<EOF;
Usage: durep [OPTION(S)] [DIRECTORY]
-h, --help this help
-v, --version show version number
Text Ouput Options:
-td, --text-depth=N limit text report on directories to depth N
-hs, --hide-size=N[bkmg] do not display entries using N Bytes/Kb/Mb/Gb
or less (default Bytes)
-sd, --show-date show modification date
-ns, --nosort do not sort results by size
-q, --quiet do not produce text output
File Options:
-sf, --save-file=<file> save the results of the scan into this file
-lf, --load-file=<file> load the results of a scan from this file
-d, --desc=<description> give description of save file
-c, --collate=<dir> collate save files in dir for web report
Inclusion Options:
-f, --files do not descend into subdirs, only report files
-x, --one-file-system do not traverse file systems
-cp, --collapse-path=<regexp> hide entries below paths that match regexp
-ep, --exclude-path=<regexp> ignore paths that match regexp
-cf, --coalesce-files=N[bkmg] coalesce files less than N Bytes/Kb/Mb/Gb
into one entry (default Bytes)
exit 0;