from pyspark import SparkConf, SparkContext

conf = SparkConf().setMaster("local").setAppName("My App")
sc = SparkContext(conf = conf)

timoviFajl = sc.textFile("timovi.csv")
timovi = timoviFajl.map(lambda x: (int(x.split(";")[0]), x.split(";")[1]))

#print(timovi.collect())
# (idTima, imeTima) [(1, 'Crvena zvezda'), (2, 'Radnicki KG'), (3, 'Vojvodina'), (4, 'Partizan')]

utakmiceFajl = sc.textFile("utakmice.csv")
utakmice = utakmiceFajl.flatMap(lambda x: [(int(x.split(";")[2]), int(x.split(";")[0])), (int(x.split(";")[3]), int(x.split(";")[0]))])
#print(utakmice.collect())
# [(idDomacina, idUtakmice), (idGosta, idUtakmice)] [(1, 1), (2, 1), (1, 2), (3, 2), (1, 3), (4, 3), (2, 4), (1, 4), (2, 5), (4, 5), (3, 6), (2, 6), (4, 7), (2, 7), (3, 8), (4, 8), (4, 9), (3, 9)]

domacinUtakmice = utakmiceFajl.map(lambda x: (int(x.split(";")[2]), int(x.split(";")[0])))
#print(domacinUtakmice.collect())
# (idDomacin, idUtakmica) [(1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (3, 6), (4, 7), (3, 8), (4, 9)]

gostUtakmice = utakmiceFajl.map(lambda x: (int(x.split(";")[3]), int(x.split(";")[0])))
#print(gostUtakmice.collect())
# (idGost, idUtakmica) [(2, 1), (3, 2), (4, 3), (1, 4), (4, 5), (2, 6), (2, 7), (4, 8), (3, 9)]

utakmicaSezona = utakmiceFajl.map(lambda x: (int(x.split(";")[0]), int(x.split(";")[1])))
#print(utakmicaSezona.collect())
# (idUtakmice, idSezone) [(1, 1), (2, 1), (3, 1), (4, 1), (5, 2), (6, 2), (7, 2), (8, 2), (9, 2)]

goloviFajl = sc.textFile("golovi.csv")
utakmicaGol = goloviFajl.map(lambda x: (int(x.split(";")[1]), int(x.split(";")[0])))
#print (utakmicaGol.collect())
# (idUtakmice, idGol) [(1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6)]
utakmicaTim = goloviFajl.map(lambda x: (int(x.split(";")[1]), int(x.split(";")[2]))).distinct()
#print (utakmicaTim.collect())
# (idUtakmice, idTima) utakmice na kojima je tim igrao, mora distinct [(1, 1), (1, 2), (2, 1), (2, 3)]
utakmica_idTim_idGol = utakmicaTim.join(utakmicaGol)
#print(utakmica_idTim_idGol.collect())
# (idUtakmica, (idTim, idGol)) [(2, (1, 4)), (2, (1, 5)), (2, (1, 6)), (2, (3, 4)), (2, (3, 5)), (2, (3, 6)), (1, (1, 1)), (1, (1, 2)), (1, (1, 3)), (1, (2, 1)), (1, (2, 2)), (1, (2, 3))]

utakmica_idTim_idGol_idSezona = utakmica_idTim_idGol.join(utakmicaSezona)
#print(utakmica_idTim_idGol_idSezona.collect())
# (idUtakmica, ((idTim, idGol), idSezona))
# [(1, ((1, 1), 1)), (1, ((1, 2), 1)), (1, ((1, 3), 1)), (1, ((2, 1), 1)), (1, ((2, 2), 1)), (1, ((2, 3), 1)), (2, ((1, 4), 1)), (2, ((1, 5), 1)), (2, ((1, 6), 1)), (2, ((3, 4), 1)), (2, ((3, 5), 1)), (2, ((3, 6), 1))]
idTim_idGol_idSezona = utakmica_idTim_idGol_idSezona.map(lambda x: (x[1][0], x[1][1]))
#print(idTim_idGol_idSezona.collect())
# [((1, 1), 1), ((1, 2), 1), ((1, 3), 1), ((2, 1), 1), ((2, 2), 1), ((2, 3), 1), ((1, 4), 1), ((1, 5), 1), ((1, 6), 1), ((3, 4), 1), ((3, 5), 1), ((3, 6), 1)]
idTim_idGol_idSezona = idTim_idGol_idSezona.map(lambda x: (x[0][0], (x[0][1], x[1])))
#print(idTim_idGol_idSezona.collect())
# (idTim, (idGol, idSezona)) [(1, (1, 1)), (1, (2, 1)), (1, (3, 1)), (2, (1, 1)), (2, (2, 1)), (2, (3, 1)), (1, (4, 1)), (1, (5, 1)), (1, (6, 1)), (3, (4, 1)), (3, (5, 1)), (3, (6, 1))]
domacin_Gol_Sezona_Utakmica = idTim_idGol_idSezona.join(domacinUtakmice)
#print(domacin_Gol_Sezona_Utakmica.collect())
# (idDomacin, ((idGol, idSezona), idUtakmica)) [(1, ((1, 1), 1)), (1, ((1, 1), 2)), (1, ((1, 1), 3)), (1, ((2, 1), 1)), (1, ((2, 1), 2)), (1, ((2, 1), 3)), (1, ((3, 1), 1)), (1, ((3, 1), 2)), (1, ((3, 1), 3)), (1, ((4, 1), 1)), (1, ((4, 1), 2)), (1, ((4, 1), 3)), (1, ((5, 1), 1)), (1, ((5, 1), 2)), (1, ((5, 1), 3)), (1, ((6, 1), 1)), (1, ((6, 1), 2)), (1, ((6, 1), 3)), (2, ((1, 1), 4)), (2, ((1, 1), 5)), (2, ((2, 1), 4)), (2, ((2, 1), 5)), (2, ((3, 1), 4)), (2, ((3, 1), 5)), (3, ((4, 1), 6)), (3, ((4, 1), 8)), (3, ((5, 1), 6)), (3, ((5, 1), 8)), (3, ((6, 1), 6)), (3, ((6, 1), 8))]
domacinGolSezona = domacin_Gol_Sezona_Utakmica.map(lambda x: (x[0], x[1][0]))
#print(domacinGolSezona.collect())
# (idDomacina, (idGol, idSezona)) [(1, (1, 1)), (1, (1, 1)), (1, (1, 1)), (1, (2, 1)), (1, (2, 1)), (1, (2, 1)), (1, (3, 1)), (1, (3, 1)), (1, (3, 1)), (1, (4, 1)), (1, (4, 1)), (1, (4, 1)), (1, (5, 1)), (1, (5, 1)), (1, (5, 1)), (1, (6, 1)), (1, (6, 1)), (1, (6, 1)), (2, (1, 1)), (2, (1, 1)), (2, (2, 1)), (2, (2, 1)), (2, (3, 1)), (2, (3, 1)), (3, (4, 1)), (3, (4, 1)), (3, (5, 1)), (3, (5, 1)), (3, (6, 1)), (3, (6, 1))]

domacinGol_Sezona = domacinGolSezona.map(lambda x: ((x[0], 1), x[1][1])) # stavio sam 1 jer nije bitno vise koji je idGola vec samo da je gol
#print(domacinGol_Sezona.collect())
# ((idDomacina, idGol), idSezona) [((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((1, 1), 1), ((2, 1), 1), ((2, 1), 1), ((2, 1), 1), ((2, 1), 1), ((2, 1), 1), ((2, 1), 1), ((3, 1), 1), ((3, 1), 1), ((3, 1), 1), ((3, 1), 1), ((3, 1), 1), ((3, 1), 1)]
domacinGol = domacinGol_Sezona.map(lambda x: x[0])
domacinGol = domacinGol.reduceByKey(lambda x, y: x + y)
#print(domacinGol.collect())
# (idDomacina, brGolova) [(1, 18), (2, 6), (3, 6)]
domacin_brGolovaGolSezona = domacinGol.join(domacinGolSezona)
#print(domacin_brGolovaGolSezona.collect())
# (idDomacina, (brGolova, (idGol, idSezona))) [(1, (18, (1, 1))), (1, (18, (1, 1))), (1, (18, (1, 1))), (1, (18, (2, 1))), (1, (18, (2, 1))), (1, (18, (2, 1))), (1, (18, (3, 1))), (1, (18, (3, 1))), (1, (18, (3, 1))), (1, (18, (4, 1))), (1, (18, (4, 1))), (1, (18, (4, 1))), (1, (18, (5, 1))), (1, (18, (5, 1))), (1, (18, (5, 1))), (1, (18, (6, 1))), (1, (18, (6, 1))), (1, (18, (6, 1))), (2, (6, (1, 1))), (2, (6, (1, 1))), (2, (6, (2, 1))), (2, (6, (2, 1))), (2, (6, (3, 1))), (2, (6, (3, 1))), (3, (6, (4, 1))), (3, (6, (4, 1))), (3, (6, (5, 1))), (3, (6, (5, 1))), (3, (6, (6, 1))), (3, (6, (6, 1)))]
domacin_brGolovaSezona = domacin_brGolovaGolSezona.map(lambda x: (x[0], (x[1][0], x[1][1][1]))).distinct()
#print(domacin_brGolovaSezona.collect())
# (idDomacina, (brGolova, sezona)) [(3, (6, 1)), (2, (6, 1)), (1, (18, 1))]


# SAD SVE ISTO ZA GOSTE

gost_Gol_Sezona_Utakmica = idTim_idGol_idSezona.join(gostUtakmice)
#print(gost_Gol_Sezona_Utakmica.collect())
gostGolSezona = gost_Gol_Sezona_Utakmica.map(lambda x: (x[0], x[1][0]))
#print(gostGolSezona.collect())
gostGol_Sezona = gostGolSezona.map(lambda x: ((x[0], 1), x[1][1])) # stavio sam 1 jer nije bitno vise koji je idGola vec samo da je gol
#print(gostGol_Sezona.collect())
gostGol = gostGol_Sezona.map(lambda x: x[0])
gostGol = gostGol.reduceByKey(lambda x, y: x + y)
#print(gostGol.collect())
gost_brGolovaGolSezona = gostGol.join(gostGolSezona)
#print(gost_brGolovaGolSezona.collect())
gost_brGolovaSezona = gost_brGolovaGolSezona.map(lambda x: (x[0], (x[1][0], x[1][1][1]))).distinct()
#print(gost_brGolovaSezona.collect())
# (idGosta, (brGolova, sezona)) [(1, (6, 1)), (2, (9, 1)), (3, (6, 1))]
