from pyspark import SparkConf, SparkContext
import csv

conf = SparkConf().setMaster("local").setAppName("kolokvijum")
sc = SparkContext(conf = conf)

def readCSV(path):
    rddCSV = sc.textFile(path) \
        .mapPartitions(lambda x : csv.reader(x, delimiter = ';'))
    return readCSV

golovi = readCSV("./golovi.csv")
utakmice = readCSV("./utakmice.csv")

golovi_t = golovi.map(lambda x : ((x[1], x[2]), 0))
# (id_utakmice, id_tima), 0

dom_ukupno = utakmice.map(lambda x : ((x[0], x[2]), x[1])) \
    .join(golovi_t) \
    .map(lambda x : ((x[0][0], x[1][0]), 1)) \
    .reduceByKey(lambda x, y : x + y) \

# MAP 1 -> (id_utak, id_dom), id_sez)
# JOIN -> ((id_dom, id_utakmice), (id_sez, 0))
# MAP 2 -> (id_dom, id_sez), 1 ako postoji utakmica, inace 0
# RED -> (id_dom, id_sez), ukupno

gost_ukupno = utakmice.map(lambda x : ((x[0], x[3]), x[1])) \
    .join(golovi_t) \
    .map(lambda x : ((x[0][0], x[1][0]), 1)) \
    .reduceByKey(lambda x, y : x + y) \

razlika = gost_ukupno.fullOuterJoin(dom_ukupno) \
        .map(lambda x : ((x[0][0], x[0][1]), (x[1][0] if x[1][0] is not None else 0, x[1][1] if x[1][1] is not None else 0)))
        .map(lambda x : ((x[0][0], x[0][1]), x[1][0] - x[1][1]))\
        .reduceByKey(lambda x, y: x[1] if x[1] > y[1] else y[1])
# JOIN -> ((id_tima, id_sez), (gostGol, domGol))
# MAP -> (id_tima, (id_sez, razlikaGol))
# RED -> (id_tima, (id_sez, MAX_RAZLIKA))

razlika.createTextFile("najSezona.csv")