from pyspark import SparkConf, SparkContext
import csv

def loadDD(filename):
    return sc.textFile(filename).mapPartitions(lambda x: csv.reader(x, delimiter=";"))

conf = SparkConf().setMaster("local").setAppName("My App")
sc = SparkContext(conf = conf)
print('emili')
df_golovi = loadDD('Ulaz/golovi.csv').map(lambda x: (int(x[1]), (int(x[0]), int(x[2]))))
# (idUtakmice, idGola, idTima)
df_utakmice = loadDD('Ulaz/utakmice.csv').map(lambda x: (int(x[0]), (int(x[1]), int(x[2]), int(x[3]))))
df_timovi = loadDD('Ulaz/timovi.csv').map(lambda x: (int(x[0]), x[1]))
df_sezona = loadDD('Ulaz/sezona.csv').map(lambda x: (int(x[0]), x[1]))
# (idUtakice, idSezone, idDomacina, idGosta)
print(df_golovi.collect())
print('------------------------------------------------------------')
print(df_utakmice.collect())
print('------------------------------------------------------------')

svi_golovi_po_utakmicama = df_utakmice.join(df_golovi)
print(svi_golovi_po_utakmicama.collect())
print('------------------------------------------------------------')
# (idUtakmice ( (idSezone, idDomacina, idGosta), (idGola, idTima) ) )
# timovi_i_sezone = svi_golovi_po_utakmicama.map(lambda x: (x[1][0][0], x[1][1][1]), (x[1][0][1], x[1][0][2]))
timovi_i_sezone = svi_golovi_po_utakmicama.map(lambda x: ((x[1][0][0], x[1][1][1]), (x[1][0][1], x[1][0][2])))
#((idSeszone, idTima), (idDomacina, idGosta))
print(timovi_i_sezone.collect())
print('------------------------------------------------------------')
# za svaki tim koliko je te sezone njegova metrika(brjGolovaDomacina - brojGolovaGosta)
timovi_i_sezone_sa_brojem_golova_po_domacinima_i_gostima = svi_golovi_po_utakmicama.map(lambda x: ((x[1][0][0], x[1][1][1]), (x[1][0][1], x[1][0][2], 1 if x[1][1][1] == x[1][0][1] else 0, 1 if x[1][1][1] == x[1][0][2] else 0)))
print(timovi_i_sezone_sa_brojem_golova_po_domacinima_i_gostima.collect())
print('------------------------------------------------------------')
##((idSezone, idTima), (idDomacina, idGosta, brojGolovaDomacina, brojGolovaGosta))

#sta mi treba sad? ako reducey za svaku sezonu i tim
# potrebno mi je samo (idSezone, idTima), (brojGolovaDomacina, brojGolovaGosta))
scores_before_reduce = timovi_i_sezone_sa_brojem_golova_po_domacinima_i_gostima.map(lambda x: ((x[0][0], x[0][1]), (x[1][2], x[1][3])))
print(scores_before_reduce.collect())
print('------------------------------------------------------------')

# scores_before_reduce_domaci = scores_before_reduce.map(lambda x: ((x[0][0], x[0][1]), x[1][2]))
# scores_before_reduce_strani = scores_before_reduce.map(lambda x: ((x[0][0], x[0][1]), (x[1][3])))
# print(scores_before_reduce_domaci.collect())
# print('------------------------------------------------------------')
# print(scores_before_reduce_strani.collect())
# print('------------------------------------------------------------')

scores = scores_before_reduce.reduceByKey(lambda x, y: (x[0] + y[0], x[1]+ y[1]))
print(scores.collect())
print('------------------------------------------------------------')

# brojGolovaGosta -> koliko su primili
# brojGolovaDomacina -> koliko su dobili
scores_final = scores.map(lambda x: ( x[0][1], (x[1][0] - x[1][1], x[0][0])))
print(scores_final.collect())
print('------------------------------------------------------------')
#((idSezone, idTima),brojGoova)
#sad za svaki tim ispisati njegovu najuspesniju sezonu, znaci reduceByKey sa idTima, gledamo maxMetriku
best_teams = scores_final.reduceByKey(lambda x, y: x if x[0] > y[0] else y)
print(best_teams.collect())
print('------------------------------------------------------------')

best_teams = best_teams.join(df_timovi)
print(best_teams.collect())
print('------------------------------------------------------------')

best_teams = best_teams.map(lambda x: (x[1][0][1], x[1][0][0], x[1][1]))
print(best_teams.collect())
print('------------------------------------------------------------')

# best_teams = best_teams.join(df_sezona)
# print(best_teams.collect())
# print('------------------------------------------------------------')









