#!/usr/bin/perl
use warnings;
use strict;
use XML::SAX;
#use XML::Simple qw(:strict);

# Structure: 'page' => { 'AmericanSamoa' => { 'change' => [ { 'diffmag' => '10781',

package CatAggr; ###############################################################
use IO::Handle;

sub load { my ($proto, $in) = @_;
#	my $xp = XML::Simple->new(KeyAttr => {page => 'title'}, ForceArray => ['page', 'change']);
#	my $xml = $xp->XMLin($in);
	# Let's try still doing it in-memory (smaller change to make),
	# but without XML::Simple's dubiousness
	my $handler = CatAggr::Handler->new();
	my $parser = XML::SAX::ParserFactory->parser( Handler => $handler );
	#$parser->parse_uri($in);
	$parser->parse_file(IO::Handle->new_from_fd(fileno(STDIN), 'r')); # Sigh. Hard-coding FTW.
	my $xml = $handler->results();
	bless({xml => $xml->{'page'}}, (ref($proto) or $proto));
}

sub pagehash { my ($this) = @_;
	$this->{xml};
}

package CatAggr::Handler; ######################################################
use Switch;
use base qw(XML::SAX::Base);

# Could really do with an 'enum' pragma package :/
use constant BOGGLE => 0; # UNKNOWN, but with a more Perl-ish name
use constant OUTER  => 1;
use constant MWCATS => 2;
use constant PAGE   => 3;
use constant CHANGE => 4;

sub new { my ($proto) = @_;
	bless({doctree => undef}, (ref($proto) or $proto));
}

sub results { (shift)->{doctree}; }

# http://search.cpan.org/~grantm/XML-SAX-0.15/SAX/Intro.pod#Callback_Parameters
sub start_document { my ($self, $doc) = @_;
	$self->{state} = OUTER;
	$self->{prevs} = BOGGLE;
	$self->{depth} = 0; # Of boggleworthyness
	$self->{chngar} = 0; # Arrayref to current page's array of changes,
		# to avoid so much indirection on each change. Starts zero so
		# that if something goes screwy and it's used uninitialised,
		# we blow up, rather than trample a disconnected or
		# previously-seen array.
	$self->{doctree} = {};
	$self->{dots} = 0; # Counts when to draw a dot. That's all. Really.
}

sub end_document { my ($self, $doc) = @_;
	# Clear up our line of progress indicator dots. Thassall.
	print STDERR "\n";
}

# The categorised output is simple enough that we only need to deal with
# elements (and not many of them), and their attributes.
sub start_element { my ($self, $el) = @_;
	my $unknown = 0;

	switch ($self->{state}) {
		# print STDERR '<'.$el->{LocalName}.'>'; # Uber-verbose trace
		case BOGGLE { $unknown = 1; }
		case OUTER {
			if($el->{LocalName} eq 'mwcats') {
				$self->{state} = MWCATS;
			} else { $unknown = 1; }
		}
		case MWCATS {
			if($el->{LocalName} eq 'page') {
				if($self->{dots} == 9) {
					print STDERR '.';
					$self->{dots} = 0;
				} else { $self->{dots}++; }
				# The award for "Most Freaking Awkward
				# Attributes Structure" goes to XML::SAX.
				my $pname = $el->{Attributes}->{'{}title'}->{Value};
				# print STDERR "\t$pname\n"; # Noisy debug spew
				# Thanks to the wonders of references, as we
				# fill in $self->{chngar}, we also fill in the
				# correct part of the doctree at zero cost.
				$self->{chngar} = [];
				my $pagehash = { change => $self->{chngar} };
				$self->{doctree}->{page}->{$pname} = $pagehash;
				# Don't forget to actually change state
				$self->{state} = PAGE;
			} else { $unknown = 1; }
		}
		case PAGE {
			if($el->{LocalName} eq 'change') {
				# Ok, slurp in all the attributes
				my $change = {};
				my $attrs = $el->{Attributes};
				foreach my $k (keys %$attrs) {
					$change->{$attrs->{$k}->{LocalName}}
						= $attrs->{$k}->{Value};
				}
				push @{$self->{chngar}}, $change;
				# State again
				$self->{state} = CHANGE;
			} else { $unknown = 1; }
		}
		case CHANGE { $unknown = 1; } # No subelements expected
		else { die 'Got into unknown state '.$self->{state}; }
	}

	if($unknown) {
		if($self->{state} == BOGGLE) {
			$self->{depth}++;
		} else {
			$self->{prevs} = $self->{state};
			$self->{state} = BOGGLE;
			$self->{depth} = 1;
		}
	}
}

sub end_element { my ($self, $el) = @_;
	switch($self->{state}) {
		case BOGGLE {
			# If depth reaches zero, restore to known state
			if(!--$self->{depth})
				{ $self->{state} = $self->{prevs}; }
		}
		case OUTER { # This would mean that the state machine got futzed
			die 'Unexpected end of '.$el->{LocalName}.' outside of document root element';
		}
		case MWCATS { $self->{state} = OUTER;  }
		case PAGE   {
			$self->{state} = MWCATS;
			# Clearing chngar releases a ref, and helps find errors
			$self->{chngar} = 0;
			# Useful for debugging the generated structure:
			#use Data::Dumper; die Dumper($self->results());
		}
		case CHANGE { $self->{state} = PAGE;   }
		else { die 'Got into unknown state '.$self->{state}; }
	}
}

package CatAggr::Page; #########################################################

sub wrap { my ($proto, $hash) = @_;
	bless({page => $hash}, (ref($proto) or $proto));
}

sub changearr { my ($this) = @_;
	$this->{page}->{change};
}

package CatAggr::Change; #######################################################

use constant ABUSE_UNKNOWN  => -1;
use constant ABUSE_NOT      =>  0;
use constant ABUSE_REVERT   =>  1;
use constant ABUSE_BLANKING =>  2;

sub wrap { my ($proto, $hash) = @_;
	bless({change => $hash}, (ref($proto) or $proto));
}

sub abuse { my ($this) = @_;
	if($this->{change}->{abuse}) {
		my $a = $this->{change}->{abusetype};
		   if($a eq 'revert'  ) { ABUSE_REVERT;   }
		elsif($a eq 'blanking') { ABUSE_BLANKING; }
		else {
			warn "Unknown abuse type '$a'.\n";
			ABUSE_UNKNOWN;
		}
	} else { ABUSE_NOT; }
}

sub byuser { my ($this) = @_;
	CatAggr::User->get($this->{change}->{byreg}, $this->{change}->{byid});
}

sub majortext  { (shift)->{change}->{majortext};  }
sub templates  { (shift)->{change}->{templates};  }
sub categories { (shift)->{change}->{categories}; }
sub pagelinks  { (shift)->{change}->{pagelinks};  }
sub urllinks   { (shift)->{change}->{urllinks};   }
sub diffmag    { (shift)->{change}->{diffmag};    }
sub newsize    { (shift)->{change}->{newsize};    }
sub revno      { (shift)->{change}->{revno};      }

package CatAggr::User; #########################################################

# Stringifiable for use as hash key
use overload q("") => \&stringify;

# Mappings from IDs to objects of this class
my %reg;
my %unreg;

# Reset all known users
sub reset { %reg = (); %unreg = (); }

# Array of all know reg and unreg users
sub allreg   { my @v = values %reg;   \@v; }
sub allunreg { my @v = values %unreg; \@v; }

# Find instance for given ID, creating if necessary
sub get { my ($proto, $registered, $id) = @_;
	if(defined(my $found = ($registered ? \%reg : \%unreg)->{$id})) { $found }
	else {
		my $self = bless({reg => $registered?1:0, id => $id}, (ref($proto) or $proto));
		($registered ? \%reg : \%unreg)->{$id} = $self;
		return $self;
	}
}

sub registered { (shift)->{reg}; }

sub stringify { my ($this) = @_;
	($this->{reg} ? 'R' : 'U') . $this->{id};
}

sub unstringify { my ($proto, $string) = @_;
	my ($first, $rest) = split(//, $string, 2);
	if(defined(my $found = ($first eq 'R' ? \%reg : \%unreg)->{$rest})) { $found }
	else {
		warn "Couldn't unstringify '$string'---not previously stringified?\n";
		undef;
	}
}

1;

