#!/usr/bin/perl -w
use strict;
use warnings;
use IO::File;
use File::Path;
use File::Basename;
use MIME::Base64;
# commandline handling
my $archive = $ARGV[0]
or die "Use: $0 WEBARCHIVE [OUTPUT DIRECTORY]\n";
my $directory = $ARGV[1] || ".";
# sanitycheck
die "$0: unable to write to output directory '$directory'\n"
unless -w $directory;
# convert (binary) webarchive to XML propertylist and capture it
my $fh = new IO::File "plutil -convert xml1 -o /dev/stdout '$ARGV[0]' |"
or die $!;
my $data = do { local $/; <$fh> };
# retrieve resources from data
my @resources = $data =~ m,\s*WebResourceData\s*.*?,gsm;
# process each resource
my $basehref;
foreach my $resource (@resources)
{
my ($encodedData) = $resource =~ m,(.*?),sm;
my ($mimeType) = $resource =~ m,WebResourceMIMEType\s*(.*?),sm;
my ($encodingName) = $resource =~ m,WebResourceTextEncodingName\s*(.*?),sm;
my ($resourceURL) = $resource =~ m,WebResourceURL\s*(.*?),sm;
# determine basehref
$basehref = $resourceURL if "text/html" eq $mimeType;
# determine file-path
my $path;
if ($basehref)
{
($path) = $resourceURL =~ m,^$basehref(.*),;
}
else
{
($path) = $resourceURL =~ m,^.*/(.*),;
}
# sanitycheck
$path = "index.html" unless $path;
# determine full path
my $fullpath = "$directory/$path";
# make directory;
mkpath(dirname($fullpath), 0755);
# export files
my $fh = new IO::File ">$fullpath"
or do {
print STDERR "error opening $fullpath";
next;
};
$fh->print(decode_base64($encodedData));
printf STDERR "%-30s: wrote file of type $mimeType\n", $path;
}