#!/usr/bin/env python
from __future__ import unicode_literals, print_function, division
import csv
from collections import defaultdict


def read_stats_file(stats_filename, out_filename):
    template_sizes = dict()
    binding_sizes = defaultdict(list)
    expansion_sizes = defaultdict(list)
    count = 0
    sum_binding_sizes = 0
    sum_expansion_sizes = 0
    with open(stats_filename) as f:
        reader = csv.reader(f, skipinitialspace=True)
        next(reader, None)  # ignore the headers
        for row in reader:
            template_id, template_size, binding_id, binding_size, expansion_id, expansion_size = row
            template_sizes[template_id] = int(template_size)
            binding_size = int(binding_size)
            sum_binding_sizes += binding_size
            binding_sizes[template_id].append(binding_size)
            expansion_size = int(expansion_size)
            sum_expansion_sizes += expansion_size
            expansion_sizes[template_id].append(expansion_size)
            count += 1

    with open(out_filename, "w") as f:
        writer = csv.writer(f)
        writer.writerow(["template", "template_size", "average_binding_size", "average_expansion_size"])
        writer.writerow([
            "overall", sum(template_sizes.values()) / len(template_sizes),
            sum_binding_sizes / count, sum_expansion_sizes / count
        ])
        for template_id, template_size in sorted(template_sizes.items()):
            writer.writerow([
                template_id, template_size,
                sum(binding_sizes[template_id]) / len(binding_sizes),
                sum(expansion_sizes[template_id]) / len(expansion_sizes)
            ])

read_stats_file("outputs/normalised_stats.csv", "outputs/totals_stats.csv")
read_stats_file("outputs/normalised2_stats.csv", "outputs/totals2_stats.csv")
