#!/usr/bin/perl -w
use strict;

# $Id: mime-construct,v 1.10 2005/03/30 20:58:54 roderick Exp $
#
# Roderick Schertler <roderick@argon.org>

# Copyright (C) 1999 Roderick Schertler
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# For a copy of the GNU General Public License write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# XXX
#    - don't always load bodies into memory?
#    - continue long header lines I construct

use Proc::WaitStat qw(close_die);

(my $Me = $0) =~ s-.*/--;
my # new line required for makemaker
    $VERSION = '1.9';

my $Debug	= 0;
my $Exit	= 0;

my $Usage = <<EOF;
usage: $Me switch...

  Global settings:
    --debug		turn debugging on
    --help		show this and then die
    --output		don\'t mail, output to stdout (loses bcc info)
    --subpart		generate subpart rather than whole message (turns
    	    	    	on --output, changes other semantics a bit)
    --version		show the version number and exit (by itself only)

  Main header, can appear anywhere:
    --bcc address	add a bcc recipient
    --cc address	add a cc recipient
    --embedded-to	send to recipients already listed in the header
    --header x		add arbitrary primary headers
    --multipart x	specify multipart content type and options
    --prelude x		add to multipart prelude text
    --subject x		specify subject
    --to address	add a to recipient

  Per-part header, only affects next part output:
    --attachment name	specify attachment disposition with given file name
    --encoding x	specify encoding for next part
    --part-header x	add arbitrary headers for next part
    --type x		specify content type for next part

  Part output switches:
    --file path		output a part whose contents come from path
    --file-auto path	same, but also set the Content-Type from the
			path's extention (requires MIME::Types)
    --file-attach path	same, but also set --attachment to the file's base name
    --string str	output a part whose contents are str

  Subpart output switches:
    --subpart-file path
    --subpart-file-auto path
    --subpart-file-attach path
    --subpart-string str
    	Same as previous two, but path or str contains a subpart.  It can
    	contains headers, and you can't specify a --type or --encoding for
    	it here (they should be supplied when it is generated).  Normally
    	it will have been generated by $Me with the --subpart switch.

The default content type is text/plain.  The default encoding is chosen
based on the body.  The default multipart type is multipart/mixed.

See the man page or \`perldoc $Me\' for the full documentation.
EOF

# Alphabet for random boundary strings, used only if it looks like none
# of the words will work.  These must all be valid boundary chars, they
# aren't vetted.
my @Alphabet = ('a'..'z');

# Words for boundary strings.  These must all be composed of valid
# boundary chars, they aren't vetted.
my @Word = qw(
    amon-hen angband aragorn azog baggins balin balrog barad-dur
    beleriand beren bilbo bombadil boromir cirith-ungol deagol dirhavel
    dol-guldur doriath durin earendil ecthelion elbereth elrond fangorn
    faramir feanor finarfin frodo galadriel gamgee gandalf gollum gondor
    gorgoroth iluvatar imladris isengard isildur ithilien lothlorien
    luthien minas-morgul minas-tirith mordor morgoth moria nargothrond
    nauglamir nazgul numenor orthanc osgiliath palantir pengolodh
    rivendell rohirrim samwise saruman sauron silmaril thangorodrim
    theoden thranduil thror uruk-hai valar voronwe weathertop

    abalone aborigine addressee admonition aerodynamic affidavits
    algonquin animators apprentice astonishing ballyhoo baltimorean
    barometric blackbird brouhaha butterers calvinball chandeliers
    comparator computerizes conductance congratulations consecrate
    constellations continual controllability courtesan cranberry
    crusading culturally curtained dartmouth daugherty deleterious
    detestable disambiguations dragonhead envisages excursions explainer
    extension fluctuations freudianism greentree harbinger
    inadequateness indoctrination inexcusably interpersonal
    intramuscular irrational jealousies latinized liberators louisville
    loveliness lutheranizer luxurious malfunctioning manhattanize
    marionette marlinspike masterfully meistersinger monochromatic
    monograph multiplier mustiness nationalizing navigating negligent
    nicaragua normanizers parallelism passivate percentile perfuming
    pigmented plexiglas procrusteanizes protestantism pterodactyl
    purportedly quinoa rachmaninoff recalibrates reciprocates
    referentiality regeneration reminiscently repudiates resynchronizing
    revisiting rigorously sanderling satisfiable saxonizations
    scampering scarceness sclerotic smokestack steelmaker stegosaurus
    stipulating telegraphing thirtieth transparently tunafish
    unaesthetically unattainability uncoordinated uncountably underflow
    unrepresentable unthinkable ventricles whitehall
);

sub xwarndie_mess {
    my @mess = ("$Me: ", @_);
    $mess[$#mess] =~ s/:(?!\n)$/: $!\n/;
    return @mess;
}

sub xdie {
    die xwarndie_mess @_;
}

sub xwarn {
    warn xwarndie_mess @_;
    $Exit ||= 1;
}

sub usage {
    xwarn @_ if @_;
    die $Usage;
}

sub init {
    srand;
}

# Perform header continuation on STRING and return the result.

sub cont {
    my $s = join '', @_;
    $s =~ s/\n(\S)/\n\t$1/g;
    return $s;
}

# Quote STRING as a MIME token and return the result.

sub token_quote {
    my $s = shift;
    # XXX Characters \200-\377 aren't actually valid.
    if ($s =~ /[\040\000-\037\177\200-\377()<>@,;:\\\"\/\[\]?=]/) {
	$s =~ s/([\"\015\\])/\\$1/g;
	$s = qq["$s"];
    }
    return $s;
}

# Choose an appropriate encoding for STRING and return it.  If DESCRIBE_ONLY
# is given, don't suggest an encoding that would actually change the data,
# just return an encoding appropriate for what the data already is.

sub choose_encoding {
    my ($s, $describe_only) = @_;

    return '7bit' if $s eq '';

    # Use 7bit if possible.
    return '7bit' if $s !~ /[^\011\012\040-\176]/	# valid chars
			&& $s =~ /\n\Z/			# trailing \n
			&& $s !~ /[ \t]\n/		# no trailing spaces
    	    	    	&& $s !~ /^.{81,}$/m;		# no long lines

    # If requested, just classify other data as either 8bit or binary,
    # don't suggest an actual encoding for it.
    if ($describe_only) {
	return '8bit' if $s !~ /[\x00\x0d]/		# no nulls or CRs
			    && $s !~ /^.{999,}/m;	# no long lines
    	return 'binary';
    }

    # QP can't represent a bytestream which lacks a trailing newline.
    return 'base64' unless $s =~ /\n\Z/;

    # Use QP if it's mostly ASCII.
    my $n = $s =~ tr/\000-\010\013-\037\200-\377//;
    return 'quoted-printable' if $n / length($s) < 0.25;

    return 'base64';
}

# Given HER_ENCODING and STRING, encode and return the string (choosing
# an encoding if none is specified).

sub encode {
    my ($encoding, $s) = @_;
    my ($lc_encoding);

    $encoding = choose_encoding $s if $encoding eq '';
    $lc_encoding = lc $encoding;

    if ($lc_encoding =~ /^(7bit|8bit|binary)$/) {
	# do nothing
    }
    elsif ($lc_encoding eq 'quoted-printable') {
	require MIME::QuotedPrint;
	$s = MIME::QuotedPrint::encode($s);
    }
    elsif ($lc_encoding eq 'base64') {
	require MIME::Base64;
	$s = MIME::Base64::encode($s);
    }
    else {
	xdie "invalid encoding $encoding\n";
    }

    return $encoding, $s;
}

# Given REF (a reference to an array of strings) and SUBSTRING, return
# false if the substring appears in any of the strings.

sub try_boundary {
    my ($rpart, $boundary) = @_;

    for (@$rpart) {
	return 0 if index($_, $boundary) >= 0;
    }
    return 1;
}

# Given REF (a reference to an array of strings), return a string which
# doesn't appear in any of the strings.

sub choose_boundary {
    my ($rpart) = @_;

    # Try words from the built in dictionary to avoid yucky random
    # boundary strings.  NB:  This is destructive to @Word.
    for (my $try = 0; $try < 100 && @Word; $try++) {
	my $word = splice @Word, rand @Word, 1;
	if (try_boundary $rpart, $word) {
	    return $word;
	}
    }

    # That failed, she's probably mailing me.  Fall back to randomness.
    for (my $try = 0; $try < 100; $try++) {
	my $word = join '', map { $Alphabet[rand @Alphabet] } 0..9;
	if (try_boundary $rpart, $word) {
	    return $word;
	}
    }

    xdie "can't find a reasonable boundary\n";
}


{ my $mts;
sub choose_type {
    my ($path) = @_;

    $mts ||= do {
	require MIME::Types;
	MIME::Types->new(only_complete => 1)
    };

    my $mt = $mts->mimeTypeOf($path)
	or return;
    return $mt->type;
} }

sub choose_attachment_name {
    my ($path) = @_;

    require File::Basename;
    return File::Basename::basename($path);
}

sub process {
    my @args = @_;

    if (@args == 1 && $args[0] eq '--version') {
	print "$Me $VERSION\n";
	return 0;
    }

    my ($switch, $arg, $subject, @to, @cc, @bcc, $header, $part_header,
	$output, $subpart, $embedded_to, $encoding, @part, $type, $attach_name,
	@output, $multipart, $multipart_encoding, @recip, $prelude);

    $output = 0;
    $subpart = 0;
    $embedded_to = 0;
    $subject = '';
    $header = '';
    $prelude = '';

    $multipart = 'multipart/mixed';
    $multipart_encoding = '7bit';

    # per-part
    $encoding = $part_header = $type = $attach_name = undef;

    while (@args) {
    	$switch = shift @args;

	$switch =~ /^-/ or usage "invalid arg (non-switch) $switch\n";

	# switches which don't take args
    	if ($switch eq '--debug') {
	    $Debug = 1;
	    next;
	}
	elsif ($switch eq '--help') {
    	    usage;
	    # not reached
	    next;
	}
	elsif ($switch eq '--output') {
	    $output = 1;
	    next;
	}
	elsif ($switch eq '--subpart') {
	    $subpart = $output = 1;
	    next;
	}
	elsif ($switch eq '--embedded-to') {
	    $embedded_to = 1;
	    next;
	}
	elsif ($switch eq '--version') {
	    print "$Me $VERSION\n";
	    xdie "--version specified with other switches\n";
	}

	# switches which do take args
	@args or xdie "invalid trailing arg (invalid switch or arg needed) ",
	    	    	"$switch\n";
	$arg = shift @args;
	if ($switch eq '--attachment') {
	    $attach_name = $arg;
	}
	elsif ($switch eq '--bcc') {
	    push @bcc, $arg;
	}
	elsif ($switch eq '--cc') {
	    push @cc, $arg;
	}
	elsif ($switch eq '--encoding') {
	    $encoding = $arg;
	}
	elsif ($switch eq '--header') {
	    $header .= $arg;
	    $header .= "\n" unless $header =~ /\n\Z/;
	}
	elsif ($switch eq '--multipart') {
	    $multipart = $arg;
	}
	elsif ($switch eq '--part-header') {
	    $part_header .= $arg;
	    $part_header .= "\n" unless $part_header =~ /\n\Z/;
	}
	elsif ($switch eq '--prelude') {
	    $prelude .= $arg;
	    $prelude .= "\n" unless $prelude =~ /\n\Z/;
	}
	elsif ($switch eq '--subject') {
	    $subject = $arg;
	}
	elsif ($switch eq '--to') {
	    push @to, $arg;
	}
	elsif ($switch eq '--type') {
	    $type = $arg;
	}

	elsif ($switch =~ /^--(subpart-)?(file(-auto|-attach)?|string)\z/) {
	    my ($body, $actual_encoding);
	    my $is_sub		= ($switch =~ s/^--subpart-/--/);
	    my $is_attach	= $switch eq '--file-attach';
	    my $is_auto_type	= $is_attach || $switch eq '--file-auto';

	    if ($switch eq '--string') {
		$body = $arg;
	    }
	    else {
		if ($is_auto_type) {
		    $type = choose_type($arg)
				|| $type
				|| 'application/octet-stream';
		}
		if ($is_attach) {
		    $attach_name ||= choose_attachment_name $arg;
    	    	}

		require FileHandle;
		# This will work with an arbitrarily weird file name,
		# but it will also allow 'zcat file.gz|' to work.
		my $fh = FileHandle->new($arg, 'r')
			    || FileHandle->new($arg)
			    || xdie "can't read $arg:";
		{
		    local $/;
		    $body = <$fh>;
		}
		close_die $fh, $arg;
	    }

	    my $type_extra = '';
	    if (defined $attach_name) {
		$part_header .= "Content-Disposition: attachment; filename="
				. token_quote($attach_name) . "\n";
		# This might not be strictly safe (maybe a particular
		# type uses name to mean something else), but it's
		# required by some mail clients.
		$type_extra .= "; name=" . token_quote $attach_name;
	    }

	    my $p = defined $part_header ? $part_header : '';

	    if ($is_sub) {
		for ([$encoding, 'an encoding'], [$type, 'a type']) {
		    defined $_->[0]
			and xdie "can't specify $_->[1] for input subparts\n";
		}
		$actual_encoding = choose_encoding $body, 1;
	    }
	    else {
		$encoding	= ''		unless defined $encoding;
		$type		= 'text/plain'	unless defined $type;

		($actual_encoding, $body) = encode $encoding, $body;

		$type .= $type_extra;
		$p .= cont "Content-Type: $type\n"
		    unless $type =~ m|^\s* text/plain
					(?:\s* ; \s* charset=(\")?us-ascii\1)?
					\s*$|xi;

		$p .= cont "Content-Transfer-Encoding: $actual_encoding\n"
		    unless lc($actual_encoding) eq '7bit';

		$p .= "\n";
	    }

	    if (lc($actual_encoding) eq 'binary') {
		$multipart_encoding = 'binary';
	    }
	    elsif (lc($actual_encoding) eq '8bit') {
		$multipart_encoding = '8bit'
		    unless $multipart_encoding eq 'binary';
	    }

	    $p .= $body;
	    push @part, $p;

	    $encoding = $part_header = $type = $attach_name = undef;
	}

	else {
	    xdie "invalid switch $switch\n";
	}
    }

    defined $encoding		and xdie "useless trailing --encoding\n";
    defined $part_header	and xdie "useless trailing --part-header\n";
    defined $type		and xdie "useless trailing --type\n";
    # Don't choke if --prelude was specified but it turned out not to be
    # multipart, that's allowed.

    @recip = (@to, @cc, @bcc)	or xdie "no recipients specified\n"
	unless $output || $embedded_to;
    unshift @recip, '-t' if $embedded_to;

    push @output, cont "To: ", join(", ", @to), "\n" if @to;
    push @output, cont "Cc: ", join(", ", @cc), "\n" if @cc;
    push @output, cont "Subject: $subject\n" if $subject ne '';

    push @output, $header if $header ne '';
    push @output, "MIME-Version: 1.0 ($Me $VERSION)\n"
	unless $subpart;

    # empty body
    if (@part == 0) {
	push @output, "\n";
    }

    # single part
    elsif (@part == 1) {
	push @output, $part[0];
    }

    # multipart
    else {
	push @part, $prelude;
	my $boundary = choose_boundary \@part;
	pop @part;

	push @output, cont "Content-Type: $multipart; boundary="
			    . token_quote($boundary) . "\n";
	push @output, "Content-Transfer-Encoding: $multipart_encoding\n"
	    unless $multipart_encoding eq '7bit';

	push @output, "\n$prelude" if $prelude ne '';

	for (@part) {
	    push @output, "\n--$boundary\n";
	    push @output, $_;
	}
	push @output, "\n--$boundary--\n";
    }

    # It's possible to wind up with a message with no trailing newline
    # (by explicitly giving an --encoding for a single part message
    # which lacks the trailing newline).  Add the newline in that case.
    push @output, "\n" unless $output[-1] =~ /\n\Z/;

    my ($fh, $cmd);
    if ($output) {
	$fh = *STDOUT;
	$cmd = 'stdout';
    }
    else {
	my $pid = open SENDMAIL, '|-';
	defined $pid or xdie "can't fork:";
	if (!$pid) {
    	    $ENV{PATH} = '/usr/local/bin:/bin:/usr/bin'
    	    	if !defined $ENV{PATH};
	    $ENV{PATH} .= ':/usr/sbin:/usr/lib';
	    exec qw(sendmail -oi), @recip
		or xdie "can't run sendmail:";
	}
	$fh = *SENDMAIL;
	$cmd = 'sendmail';
    }
    {
	local $SIG{PIPE} = 'IGNORE';
	for (@output) {
	    print $fh $_ or xdie "error writing to $cmd:";
	}
    }
    close_die $fh, $cmd;
}

sub main {
    init;
    process @ARGV;
    return 0;
}

$Exit = main || $Exit;
$Exit = 1 if $Exit && !($Exit % 256);
exit $Exit;
