#!/usr/bin/perl

# Tidy Proxy 0.94
# Copyright (C) 2002-2003  Alexander Kreuzer <alex@freesources.org>
# This program is free software.  You may copy or
# redistribute it under the same terms as Perl itself

use strict;
use warnings;

use POSIX qw/:sys_wait_h setsid/;
use FileHandle;
use IO::Select;
use IO::Pipe;
use IPC::Open3;
use Getopt::Long;
use Pod::Usage;

use HTTP::Daemon; # from LWP
use HTTP::Status;
use LWP::UserAgent;
use HTML::TreeBuilder;

sub handle_client($);
sub tidy($);
sub validate($);
sub gen_output($$$);
sub logmsg;

my $listen_host = 'localhost';
my $listen_port = 9090;
my $tidy_level = 1; # 1 for warnings and errors
                    # 2 for errors
my $tidy_cmd = '/usr/bin/tidy';
my $validate_cmd = '/usr/bin/validate';
my $HTML_DTD = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">';

my $action = 't';
my $action_func;
my $help = 0;
my $nodaemonize = 0;
my $pid_file;
my $server_front;
my $loc_rewrite;

GetOptions('host=s' => \$listen_host,
	   'port|p=i' => \$listen_port,
	   'nodaemon|d' => \$nodaemonize,
	   'level|l=i' => \$tidy_level,
	   'action=s' => \$action,
	   'tidy-cmd=s' => \$tidy_cmd,
	   'validate-cmd=s' => \$validate_cmd,
	   'pid=s' => \$pid_file,
	   'dest-host=s' => \$server_front,
	   'loc-rewrite' => \$loc_rewrite,
	   'help|h|?' => \$help
	  )
  or pod2usage(2);

pod2usage(1) if ($help);
unless ($tidy_level == 1 or $tidy_level == 2) {
  pod2usage('Tidy Level must be 1 or 2');
}

if ($action =~ /^t$/io) {
  unless (-x $tidy_cmd) {
    print STDERR ("Error: tidy command not found\n");
    exit(4);
  }
  $action_func = \&tidy;
}
elsif ($action =~ /^v$/io) {
  unless (-x $validate_cmd) {
    print STDERR ("Error: validate command not found\n");
    exit(4);
  }
  $action_func = \&validate;
}
else {
  pod2usage('Action must be eigther v or t');
  pod2usage(4);
}

if (not defined $server_front and $loc_rewrite) {
  pod2usage('--loc-rewrite must be used with --dest-host');
  pod2usage(4);
}

unless ($nodaemonize) {
  setsid() ;
  chdir('/');
  open STDIN, '/dev/null';
  open STDOUT, '> /dev/null';
  open STDERR, '> /dev/null';

  $_ = fork;
  if (defined $_) {
    if ($_) {
      exit;
    }
    elsif ($pid_file) {
      open(PID_FILE, "> $pid_file") or warn "Could not open pid file: $!";
      print PID_FILE $$;
      close PID_FILE;
    }
  }
  else {
    die ('Could not fork: $!');
  }
}

logmsg 'notice', 'tidy-proxy started';

my $daemon = new HTTP::Daemon(LocalAddr => $listen_host, LocalPort => $listen_port, ReuseAddr => 1)
  or die "Could not start Daemon: $!";
my $agent = new LWP::UserAgent(agent => 'TidyProxy'); # maybe allow user to set more options

sub REAPER{
  local $_ = waitpid -1, WNOHANG;
  warn 'waitpid error' if ($_ == -1);
};

$SIG{CHLD} = \&REAPER;

while(1) {
  my $client = $daemon->accept;
  next unless($client);
  $_ = fork;
  die "Could not fork: $!" unless defined $_;
  unless ($_) {

    $SIG{CHLD} = 'DEFAULT'; # handle_client calls waitpid self

    handle_client($client);
    exit 0;
  }
}

my $handle_request_sent_header;
my $handle_request_data;
my $handle_request_client;
my $handle_request_pipe;

sub handle_response_data {
  my $select = IO::Select->new($handle_request_pipe);
  IO::Select->select($select, undef, $select);
  if (defined(read $handle_request_pipe, $_, 8129)) {
    return $_;
  }
  else {
    return undef;
  }
}

sub handle_request_data {
  my ($data, $resp, $protocol) = @_;
  local $_;

  if ($handle_request_data or $resp->header('Content-Type') =~ /^text\/html/) {
    $handle_request_data .= $data;
  }
  else {
    unless ($handle_request_sent_header) {
      $handle_request_pipe = IO::Pipe->new;
      if ($_ = fork) {
	$handle_request_pipe->writer();
	$handle_request_pipe->autoflush();
	$handle_request_sent_header = 1;
      }
      elsif (defined $_) {
	$handle_request_pipe->reader();
	$resp->content(\&handle_response_data);
	$handle_request_client->send_response($resp);
	exit 0;
      }
      else {
	warn "Could not fork $!";
      }
    }
    print $handle_request_pipe $data;
  }
}

sub handle_client($) {
  local $_;
  my $client = shift;
  my $conn_host;
  while (my $req = $client->get_request) {
    logmsg('info', 'Got Request: ' . $req->uri->as_string . " on pid $$");
    $req->remove_header('Accept-Encoding');

    if (defined $server_front) {
      $req->uri("http://$server_front" . $req->uri->path);
      $conn_host = $req->header('Host' => $server_front) if ($loc_rewrite);
      #$req->remove_header('Referer');
    }
    else {
      $req->remove_header('Proxy-Connection');
    }

    $handle_request_sent_header = undef;
    $handle_request_data = undef;
    $handle_request_client = $client;
    $SIG{CHLD} = \&REAPER;

    my $resp = $agent->send_request($req, \&handle_request_data);

    $SIG{CHLD} = 'DEFAULT';
    close $handle_request_pipe if (defined($handle_request_pipe));
    $resp->content($handle_request_data);

    if ($loc_rewrite and defined $server_front and $resp->header('Location')) {
      $_ = new URI($resp->header('Location'));
      if ($_->host =~ /^${server_front}$/io) {
	$_->host($conn_host);
	$resp->header('Location', $_);
      }
    }

    if ($resp->is_success and $handle_request_data and $resp->header('Content-Type') =~ /^text\/html/) {
      $_ = &$action_func($handle_request_data);
      if ($_->[3]) {
	$resp = HTTP::Response->new(200, 'OK', HTTP::Headers->new(Content_Type => 'text/html'), gen_output($req, $resp, $_));
      }
    }
    unless ($handle_request_sent_header) {
      $client->send_response($resp);
    }
  }
  $client->close;
}

sub tidy($) {
  local $_;
  $_ = systemex::systemex($tidy_cmd, shift());
  push @$_, grep { /\d+ warnings?, \d+ errors? were found\!/o } split /\n/, $_->[2] if ($_->[0] >= $tidy_level);
  return $_;
}

sub validate($) {
  local $_;
  $_ = systemex::systemex($validate_cmd, shift());
  push @$_, "$_->[0] Error(s)";
  return $_;
}

sub gen_output($$$) {
  my $req = shift;
  my $uri = $req->uri->as_string;
  my $resp = shift;
  my $tr = shift;

  my $button_element = HTML::Element->new('form');
  $button_element->push_content(HTML::Element->new('input', type => 'button', value => 'Toggle Output', onClick => 'tidy_proxy_toggle_show()'));

  my $script_element = HTML::Element->new('script', type => 'text/javascript');
  $script_element->push_content(
<<'SCRIPT'
window.document.getElementById("tidy_proxy_output").style.display = "none";

function tidy_proxy_toggle_show() {
   if (window.document.getElementById("tidy_proxy_output").style.display == "none") {
      window.document.getElementById("tidy_proxy_output").style.display = "block";
   }
   else {
      window.document.getElementById("tidy_proxy_output").style.display = "none";

   }
}
SCRIPT
			  );

  my @orig_html = split "\n", $resp->content();
  for(local $_ = 0; $_ < @orig_html; $_++) {
    $orig_html[$_] = $_ + 1 . ': ' . $orig_html[$_];
  }

  my $main_element = HTML::Element->new('div', style => 'background-color:white; background-image:none; color:black');
  $main_element->push_content([ 'p', $tr->[3] ],
			      $button_element,
			      [ 'table', { id => 'tidy_proxy_output', border => '1' },
				[ 'tr',
				  [ 'td', { colspan => 2 },
				    [ 'pre', $tr->[2] ]
				  ]
				],
				[ 'tr',
				  [ 'td', { valign => 'top' }, [ 'pre', $tr->[1] ] ],
				  [ 'td', { valign => 'top' }, [ 'pre', join "\n", @orig_html ] ],
				]
			      ],
			      $script_element,
			      ['hr']
			     );

  my $tree = HTML::TreeBuilder->new_from_content($resp->content());
  foreach $_ ($tree->content_list()) {
    if ($_->tag() =~ /^body$/oi) {
      $_->unshift_content($main_element);
      last;
    }
  }
  $_ = $HTML_DTD . $tree->as_HTML;
  $tree->delete;
  return $_;
}

sub logmsg {
  my ($l, @a) = @_;
  ($l, @a) = @_;
  print STDERR @a, "\n";
}


package systemex;

use IPC::Open3;
use IO::Select;
use Fcntl;

sub systemex($;$@) {
  my $prog = shift;
  my $code = shift;
  my @args = @_;
  my ($w, $r, $e) = (FileHandle->new, FileHandle->new, FileHandle->new);
  my ($ro, $eo);
  my $pid = open3($w, $r, $e, $prog, @args) or die "Could not start tidy: $!";
  fcntl($w, F_SETFL, O_NONBLOCK) or warn "Could not fcntl: $!";
  fcntl($r, F_SETFL, O_NONBLOCK) or warn "Could not fcntl: $!";
  fcntl($e, F_SETFL, O_NONBLOCK) or warn "Could not fcntl: $!";

  my $w_select = IO::Select->new($w);
  my $r_select = IO::Select->new($r, $e);
  my $w_pos = 0;
  while(1) {
    my $count = 0;

    if ($w_pos < length($code)) {
      IO::Select::select($r_select, $w_select, undef);

      while ($_ = syswrite($w, $code, length($code) - $w_pos, $w_pos)) {
	$w_pos += $_;
	$count += $_;
	unless ($w_pos < length($code)) {
	  $w->close or warn "Could not close write handle: $!";
	  last;
	}
      }
    }
    else {
      $r_select->can_read;
    }

    my $buf;
    while($_ = sysread($r, $buf, 1024)) {
      $ro .= $buf;
      $count += $_;
    }
    while($_ = sysread($e, $buf, 1024)) {
      $eo .= $buf;
      $count += $_;
    }
    last unless ($count);
  }

  waitpid($pid, 0) != -1 or warn "Waitpid faild: $!";
  my $exitcode = $? >> 8;

  return [ $exitcode, $ro, $eo ];
}

1;

__END__

=head1 NAME

tidy-proxy - html tidy proxy

=head1 SYNOPSIS

S<B<tidy-proxy> [--host hostname] [-p port] [-d] [-l {1|2}] [--action {t|v}] [--tidy-cmd tidy-command] [--validate-cmd validate command] [--pid pid-file]>

S<B<tidy-proxy> -h>

=head1 OPTIONS

=over 4

=item B<--host> I<host>

The host paramter sets the listening address for tidy-proxy.
default: localhost

=item B<-p>, B<--port> I<port>

port sets the listening port for tidy-proxy.
default: 9090

=item B<-d>, B<--nodaemon>

run tidy-proxy in foreground

=item B<-l>, B<--level> I<level>

level sets the filtering level for tidy-proxy
1: Warnings
2: Errors

=item B<--action> I<t|v>

set if to use I<t>idy or I<v>alidate
Default: tidy

=item B<--tidy-cmd> I<cmd>

Command to use for tidy.
Default: F</usr/bin/tidy>

=item B<--validate-cmd> I<cmd>

Command to use for validate.
Default: F</usr/bin/validate>

=item B<--pid> I<pid-file>

Create a pid file.
Works only in daemon mode.

=item B<--dest-host> I<destination host>

Run tidy-proxy in reverse-proxy mode.
Tidy-proxy acts as normal webserver and forwards ervery request
to I<destionation host>.

=item B<--loc-rewrite>

rewrite the Location and the Host header in reverse-proxy mode

=item B<-h>, B<-?>, B<--help>

Prints help message.

=head1 COPYRIGHT

Copyright 2002-2003, Alexander Kreuzer <alex@freesources.org>

This program is free software.  You may copy or
redistribute it under the same terms as Perl itself

=cut
