#!/usr/bin/env python
import csv


def dedup_bindings(filename):
    # Assuming each binding is used maximum twice!!!

    # first	pass
    seen_bindings = set()
    duplicates = {}
    rows = []
    with open(filename) as f:
        reader = csv.reader(f, skipinitialspace=True)
        headers = next(reader, None)  # ignore the headers
        for row in reader:
            rows.append(row)
            template_id, template_size, binding_id, binding_size, expansion_id, expansion_size = row
            if binding_id in seen_bindings:
                duplicates[binding_id] = row
            else:
                seen_bindings.add(binding_id)

    # second pass - overwriting the original file
    seen_bindings = set()
    with open(filename, "w") as f:
        writer = csv.writer(f)
        writer.writerow(headers)
        for row in rows:
            template_id, template_size, binding_id, binding_size, expansion_id, expansion_size = row
            if binding_id in seen_bindings:
                continue
            seen_bindings.add(binding_id)
            if binding_id in duplicates:
                next_row = duplicates[binding_id]
                template_id = "+".join(sorted([template_id, next_row[0]]))  # joining two template ids
                template_size = int(template_size) + int(next_row[1])
                expansion_size = int(expansion_size) + int(next_row[5])
            writer.writerow((template_id, template_size, binding_id, binding_size, expansion_id, expansion_size))


dedup_bindings("outputs/normalised_stats.csv")
dedup_bindings("outputs/normalised2_stats.csv")
