import matplotlib.pyplot as plt
import os
import re

formats = ["provn"]
# formats = ["", "json", "provn", "ttl"]formats = ["", "json", "provn", "ttl"] # "" to generate no of triples

colours = ['#550000', '#801515', '#D46A6A',
           '#042037', '#123652', '#496D89',
           '#405100', '#647A14', '#B6CB66',
           '#1C053A', '#321456', '#6B4E90',
           '#3F002E', '#5F1049', '#9F4F89',
           '#eee999', '#aaa888']

hatches = ['////', '\\\\\\\\', '//']


#def setHatchThickness(value):
#    libpath = plt.__path__[0]
#    backend_pdf = libpath + "/backends/backend_pdf.py"
#    with open(backend_pdf, "r") as r:
#        code = r.read()
#        code = re.sub(r'self\.output\((\d+\.\d+|\d+)\,\ Op\.setlinewidth\)',
#                   "self.output(%s, Op.setlinewidth)" % str(value), code)
#        with open('/tmp/hatch.tmp', "w") as w:
#            w.write(code)
#        print backend_pdf
#        os.system('sudo mv /tmp/hatch.tmp %s' % backend_pdf)


#setHatchThickness(1.0)
def generate_stats(info):
    bindings = [0,1] #binding
    for binding in bindings:
        generate_s(info, binding)


def generate_s(info, binding_type):

    all_binding_sizes_by_template = {}

    for f in formats:

        if len(f) == 0:
            if binding_type == 0:
                bindings_output = "outputs/bindings_output.txt"
            else:
                bindings_output = "outputs/bindings2_output.txt"
            templates_output = "outputs/templates_output.txt"
            expansions_output = "outputs/expansions_output.txt"
        else:
            #bindings_output = "outputs/bindings_output_{1}.txt".format(f)
            #templates_output = "outputs/templates_output_{1}.txt".format(f)
            #expansions_output = "outputs/expansions_output_{1}.txt".format(f)
            if binding_type == 0:
                bindings_output = "outputs/bindings_output.txt"
            else:
                bindings_output = "outputs/bindings2_output.txt"
            templates_output = "outputs/templates_output.txt"
            expansions_output = "outputs/expansions_output.txt"

        if len(f) == 0:
            if binding_type == 0:
                csv = 'outputs/normalised_stats.csv'
                csv2 = 'outputs/totals_stats.csv'
            else:
                csv = 'outputs/normalised2_stats.csv'
                csv2 = 'outputs/totals2_stats.csv'
        else:
            #csv = 'outputs/normalised_stats_{0}.csv'.format(f)
            #csv2 = 'outputs/totals_stats_{0}.csv'.format(f)
            if binding_type == 0:
                csv = 'outputs/normalised_stats.csv'
                csv2 = 'outputs/totals_stats.csv'
            else:
                csv = 'outputs/normalised2_stats.csv'
                csv2 = 'outputs/totals2_stats.csv'
        try:
            os.remove(csv)
            os.remove(csv2)
        except OSError:
            pass

        binding_sizes_by_template = {}

        with open(csv, 'a') as stat_file:
            headers = "template name, size, binding, size, expansion, size\n"
            stat_file.write(headers)

        templates = {}
        with open(templates_output) as template_file:
            for template_line in template_file:
                template_name = template_line.split(', ')[0].strip().split("/")[-1].split(".")[0]
                template_size = template_line.split(', ')[1].strip()
                templates[template_name] = template_size

        print "templates: ", templates
        bindings = {}

        with open(bindings_output) as binding_file:
            for binding_line in binding_file:
                binding_name = binding_line.split(', ')[0].strip().split("/")[-1].split(".")[0]
                binding_size = binding_line.split(', ')[1].strip()
                bindings[binding_name] = binding_size

        print "bindings: ", bindings

        expansions = {}
        with open(expansions_output) as expansion_file:
            for expansions_line in expansion_file:
                expansion_name = expansions_line.split(', ')[0].strip().split("/")[-1].split(".")[0]
                expansion_size = expansions_line.split(', ')[1].strip()
                expansions[expansion_name] = expansion_size

        print "expansions: ", expansions

        with open(info) as info_file:

            for info_line in info_file:

                print len(info_line)
                print "info line: ",info_line
                template_name = info_line.split()[0].split('/')[2]
                binding_name = info_line.split()[1].split('/')[2]
                expansion_name = info_line.split()[2].split('/')[2]

                # Skipping missing data (some were manually excluded due to errors)
                if template_name not in templates:
                    continue
                if binding_name not in bindings:
                    continue
                if expansion_name not in expansions:
                    continue

                template_size = templates[template_name]
                binding_size = bindings[binding_name]
                expansion_size = expansions[expansion_name]

                stat = "{0}, {1}, {2}, {3}, {4}, {5}\n".format(template_name,
                                                               template_size,
                                                               binding_name,
                                                               binding_size,
                                                               expansion_name,
                                                               expansion_size)

                if template_name in binding_sizes_by_template.keys():
                    binding_sizes_by_template[template_name]['binding_size'].append(int(binding_size))
                    binding_sizes_by_template[template_name]['expansion_size'].append(int(expansion_size))
                else:
                    binding_sizes_by_template[template_name] = {'binding_size': [int(binding_size)],
                                                                'expansion_size': [int(expansion_size)],
                                                                'template_size': int(template_size)}
                with open(csv, 'a') as stat_file:
                    stat_file.write(stat)

            averages = {}

            for temp_name, sizes in binding_sizes_by_template.iteritems():
                for s in sizes.iteritems():
                    if 'binding_size' is s[0]:
                        averages[temp_name+'_average_binding_size'] = sum(s[1])/len(s[1])
                        if 'b_sizes' in averages.keys():
                            averages['b_sizes'].extend(s[1])
                        else:
                            averages['b_sizes'] = s[1]
                    if 'expansion_size' is s[0]:
                        averages[temp_name+'_average_expansion_size'] = sum(s[1])/len(s[1])
                        if 'e_sizes' in averages.keys():
                            averages['e_sizes'].extend(s[1])
                        else:
                            averages['e_sizes'] = s[1]
                    if 'template_size' is s[0]:
                        averages[temp_name+'_size'] = s[1]
                        if 't_sizes' in averages.keys():
                            averages['t_sizes'].extend([s[1]])
                        else:
                            averages['t_sizes'] = [s[1]]

            averages['total_average_b_size'] = sum(averages['b_sizes'])/len(averages['b_sizes'])
            averages['total_average_t_size'] = sum(averages['t_sizes'])/len(averages['t_sizes'])
            averages['total_average_e_size'] = sum(averages['e_sizes'])/len(averages['e_sizes'])

            headers = 'template, template_size, average_binding_size, average_expansion_size\n'
            record = '{0}, {1}, {2}, {3}\n'.format('total_average', 
                                                   averages['total_average_t_size'],
                                                   averages['total_average_b_size'], 
                                                   averages['total_average_e_size'])
            with open(csv2, 'a') as stat2_file:
                stat2_file.write(headers)
                stat2_file.write(record)

            average_binding_size = 0
            average_expansion_size = 0
            average_template_size = 0

            for temp_name, sizes in binding_sizes_by_template.iteritems():
                for k, v in averages.iteritems():
                    if "{0}{1}".format(temp_name, '_average_binding_size') == str(k):
                        average_binding_size = v
                    if "{0}{1}".format(temp_name, '_average_expansion_size') == str(k):
                        average_expansion_size = v
                    if "{0}{1}".format(temp_name, '_size') == str(k):
                        average_template_size = v
                    record2 = '{0}, {1}, {2}, {3}\n'.format(temp_name,
                                                            average_template_size,
                                                            average_binding_size,
                                                            average_expansion_size)
                with open(csv2, 'a') as stat2_file:
                        stat2_file.write(record2)

            all_binding_sizes_by_template[f] = binding_sizes_by_template
    return all_binding_sizes_by_template


def generate_average_triples():

    csv2 = 'outputs/totals_stats.csv'
    read_file = open(csv2, 'r')
    sep_file = read_file.read().split('\n')
    count = 1
    count2 = 0
    x = []
    labels = []

    for d in sep_file[1:]:
        if len(d) is not 0:
            values = d.split(',')
            if values[0] == 'total_average':
                continue

            name = values[0]

            plt.bar([count-3],
                    [float(values[1])],
                    color=colours[count2],
                    edgecolor='black', hatch=hatches[0])

            plt.bar([count+1-3],
                    [float(values[2])],
                    color=colours[count2],
                    edgecolor='black', hatch=hatches[1])

            x.append(count+1-3+0.33)
            labels.append(name)

            plt.bar([count+2-3],
                    [float(values[3])],
                    color=colours[count2],
                    edgecolor='black', hatch=hatches[2])

            count2 += 1
        count += 3

    plt.title('Average Number of Triples for Templates, Bindings and \n their Expansion Normalised in ttl', y=1.08)
    plt.xlabel('Provenance (Template, Binding, Expansion)')
    plt.ylabel('Number of Triples')
    plt.bar([0], [0], color="w", hatch="/", label='Templates')
    plt.bar([0], [0], color="w", hatch="\\", label='Bindings')
    plt.bar([0], [0], color="w", hatch="//", label='Expansions')
    plt.xticks(x, labels, rotation='vertical')
    plt.legend(loc=2)
    plt.tight_layout(h_pad=5, w_pad=10)
    plt.savefig('outputs/triples.pdf')
    # plt.show()
    plt.close()


def prov_sum_aggregator_bytes(prov_format, binding_type):

    #csv2 = 'totals_stats_{0}.csv'.format(prov_format)
    if binding_type == 0:
        csv2 = 'outputs/totals_stats.csv'.format(prov_format)
    else:
        csv2 = 'outputs/totals2_stats.csv'.format(prov_format)
    read_file = open(csv2, 'r')
    sep_file = read_file.read().split('\n')
    count = 0
    count2 = 0
    x = []
    labels = []
    summary_details = {"template_sum": 0,
                       "binding_sum": 0,
                       "expansion_sum": 0,
                       "binding_expansion_percentage_difference": 0}

    for d in sep_file:
        if count != 0 and len(d) is not 0:
            values = d.split(',')

            name = values[0]

            plt.bar([count-3],
                    [float(values[1])],
                    color=colours[count2],
                    edgecolor='black', hatch=hatches[0], ecolor='black')

            plt.bar([count+1-3],
                    [float(values[2])],
                    color=colours[count2],
                    edgecolor='black', hatch=hatches[1], ecolor='black')

            x.append(count+1-3+0.33)
            labels.append(name)

            plt.bar([count+2-3],
                    [float(values[3])],
                    color=colours[count2],
                    edgecolor='black', hatch=hatches[2], ecolor='black')

            summary_details["template_sum"] = int(summary_details["template_sum"]) + int(values[1])
            summary_details["binding_sum"] = int(summary_details["binding_sum"]) + int(values[2])
            summary_details["expansion_sum"] = int(summary_details["expansion_sum"]) + int(values[3])

            count2 += 1
        count += 3

    delta = abs(float(float(summary_details["binding_sum"]) - float(summary_details["expansion_sum"])))
    sigma = (float(summary_details["binding_sum"]) + float(summary_details["expansion_sum"]))/2

    diff = (delta/sigma)*100

    summary_details['binding_expansion_percentage_difference'] = diff

    if binding_type == 1:
        title = "{0}_binding2".format(prov_format, )
    else:
        title = prov_format
    totals = {title: summary_details}
    print totals

    plt.title('Total Size of Templates, Bindings and \n their Expansion Normalised in {0} in bytes'.format(prov_format), y=1.08)
    plt.xlabel('Provenance (Template, Binding, Expansion)')
    plt.ylabel('Size in Bytes')
    plt.bar([0], [0], color="w", hatch=hatches[0], label='Templates', ecolor='black')
    plt.bar([0], [0], color="w", hatch=hatches[1], label='Bindings', ecolor='black')
    plt.bar([0], [0], color="w", hatch=hatches[2], label='Expansions', ecolor='black')
    plt.xticks(x, labels, rotation='vertical')
    plt.legend(loc=2)
    plt.tight_layout(h_pad=5, w_pad=10)
    if binding_type == 0:
        plt.savefig('outputs/{0}_sum_size.pdf'.format(prov_format))
    else:
        plt.savefig('outputs/{0}_sum_size2.pdf'.format(prov_format))
    # plt.show()
    plt.close()


def average_graph(binding_type):

    files = {}
    for f in formats:
        if len(f) > 0:
            # csv = 'outputs/totals_stats_{1}.csv'.format(f)
            if binding_type == 0:
                csv = 'outputs/totals_stats.csv'
            else:
                csv = 'outputs/totals2_stats.csv'
            files[f] = csv

    file_count = 0

    x = []
    labels = []
    for name, file_loc in files.iteritems():
        read_file = open(file_loc, 'r')
        sep_file = read_file.read().split('\n')
        count = 0
        count2 = 0

        for d in sep_file:
            if count != 0 and len(d) is not 0:
                values = d.split(',')
                if values[0] == 'total_average':
                    plt.bar([count*file_count],
                            [float(values[1])],
                            color=colours[file_count],
                            edgecolor='black', hatch=hatches[0])

                    plt.bar([(count*file_count)+1],
                            [float(values[2])],
                            color=colours[file_count],
                            edgecolor='black', hatch=hatches[1])
                    x.append((count*file_count)+1+0.33)
                    labels.append(name)

                    plt.bar([(count*file_count)+2],
                            [float(values[3])],
                            color=colours[file_count],
                            edgecolor='black', hatch=hatches[2])
                    count2 += 1
            count += 3
        file_count += 1

    plt.title('Average Size of Templates, Bindings and \n their Expansion Normalised in {0}'.format(formats), y=1.08)
    plt.xlabel('Provenance (Template, Binding, Expansion)')
    plt.ylabel('Size in Bytes')
    plt.bar([0], [0], color="w", hatch="/", label='Templates')
    plt.bar([0], [0], color="w", hatch="\\", label='Bindings')
    plt.bar([0], [0], color="w", hatch="//", label='Expansions')
    plt.xticks(x, labels, rotation='vertical')
    plt.legend(loc=1)
    plt.tight_layout(h_pad=5, w_pad=10)
    if binding_type == 0:
        plt.savefig('outputs/average_sizes.pdf')
    else:
        plt.savefig('outputs/average_sizes2.pdf')
    # plt.show()
    plt.close()


def average_box_plot(binding_type):

    files = []
    labels = []
    for f in formats:
        if len(f) > 0:
            #files.append('outputs/templates_output_{0}.txt'.format(f))
            #files.append('outputs/bindings_output_{0}.txt'.format(f))
            #files.append('outputs/expansions_output_{0}.txt'.format(f))
            files.append('outputs/templates_output.txt')
            if binding_type == 0:
                files.append('outputs/bindings_output.txt')
            if binding_type == 2:
                files.append('outputs/bindings_output.txt')
                files.append('outputs/bindings2_output.txt')
            else:
                files.append('outputs/bindings2_output.txt')
            files.append('outputs/expansions_output.txt')

            labels.append('T')
            labels.append('B')
            if binding_type == 2:
                labels.append('{0}   B2'.format(f))
            labels.append('E')

    data_set = []

    location = ""
    for x in files:
        f = open(location+x, 'r')
        file_data = f.read().split('\n')
        data_set.append([float(d.split(',')[1]) for d in file_data if d != ''])

    plt.boxplot(data_set, sym='', )

    plt.title('Size of Templates, Bindings and \n their Expansion Normalised in {0}'.format(formats), y=1.08)
    if binding_type == 2:
        plt.xlabel('Provenance (Template (T), Binding (B), Binding2 (B2), and Expansion (E))')
    else:
        plt.xlabel('Provenance (Template (T), Binding (B), and Expansion (E))')
    plt.ylabel('Size in Bytes')

    r_top = ((len(formats)-1)*3)+1
    r = range(1, r_top)
    plt.xticks(r, labels, rotation='vertical')
    plt.tight_layout(h_pad=5, w_pad=10)
    if binding_type == 0:
        plt.savefig('outputs/box_sizes.pdf')
    if binding_type == 2:
        plt.savefig('outputs/all_box_sizes.pdf')
    else:
        plt.savefig('outputs/box_sizes2.pdf')
    # plt.show()
    plt.close()


def generate_size_graphs():

    binding_types = [0, 1]
    # generate_average_triples()
    for binding_type in binding_types:
        for f in formats:
            if len(f) > 0:
                prov_sum_aggregator_bytes(f, binding_type)
        # average_graph(binding_type)
        # average_box_plot(binding_type)
    # average_graph(2)
    average_box_plot(2)

generate_stats('outputs/info2.txt')
generate_size_graphs()
