# approach 1: deviation is analyzed for each registration point module registrationPoints include("./output.jl") using Statistics, CSV, Dates, DataFrames, StatsBase using .output export analyzeStatisticalVariation, allDataQuantile, top1, top100 ## function analyzeStatisticalVariation(allPerfData, settings) quantileD1, quantileD2 = calculateQuantiles(allPerfData, settings) return quantileD1, quantileD2 end """ Function is preparing the new dataframes with the stations which are supposed to be analyzed. """ function calculateQuantiles(perfData, settings) if settings.objectInFocus == "single line" lineNr = 1 else lineNr = findall(x -> x == settings.analyzedLine, settings.allLines) lineNr = lineNr[1] end stationList = settings.stationLists[lineNr] dataDirection1 = DataFrame() dataDirection1[!, :station] = stationList dataDirection1[!, :DepOrArr] = fill("D", size(dataDirection1, 1)) dataDirection2 = DataFrame() dataDirection2[!, :station] = reverse(stationList) dataDirection2[!, :DepOrArr] = fill("D", size(dataDirection2, 1)) # sorting the performance data by directions perfDataDirection1 = perfData[ perfData[:, :ZUGEREIGNIS_RICHTUNG].==stationList[size(stationList, 1)], :, ] perfDataDirection2 = perfData[perfData[:, :ZUGEREIGNIS_RICHTUNG].==stationList[1], :] if settings.timePeriod[1] == "no" dataDirection1 = calculateQuantileForDirection( dataDirection1, perfDataDirection1, settings, ) dataDirection2 = calculateQuantileForDirection( dataDirection2, perfDataDirection2, settings, ) elseif settings.timePeriod[1] == "match day" && settings.analyzedLine != "11" dataDirection1 = calculateQuantileForGame( dataDirection1, perfDataDirection1, settings, ) dataDirection2 = calculateQuantileForGame( dataDirection2, perfDataDirection2, settings, ) elseif settings.timePeriod[1] == "rush hour" && settings.analyzedLine != "11" dataDirection1 = calculateQuantileForTimePeriod( dataDirection1, perfDataDirection1, settings, ) dataDirection2 = calculateQuantileForTimePeriod( dataDirection2, perfDataDirection2, settings, ) elseif settings.analyzedLine != "11" dataDirection1 = calculateQuantileForDay( dataDirection1, perfDataDirection1, settings, ) dataDirection2 = calculateQuantileForDay( dataDirection2, perfDataDirection2, settings, ) end return dataDirection1, dataDirection2 end """ Function is calculating the selected quantiles for each registration point for both directions. """ function calculateQuantileForDirection( dataDirection1, perfDataDirection1, settings, ) deviationArray = Any[] # for deviation shown with quantile marker = 1 # to make clear if railway object is arriving or departing # registration points with no data are being deleted dataDirection1 = deleteEmptyStations(dataDirection1, perfDataDirection1) for station in eachrow(dataDirection1) stationPerfData = perfDataDirection1[ perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station, :, ] if length(stationPerfData.ZUGEREIGNIS_DS100) == 0 dataDirection1 = dataDirection1[dataDirection1[:, :station].!=station.station, :] marker = 0 else if iseven(marker) station.DepOrArr = "A" end deviationStation = Any[] totalDeviation = 0 for row in eachrow(stationPerfData) deviation = 0 if iseven(marker) if row.ZUGEREIGNIS_TYP == 20 || row.ZUGEREIGNIS_TYP == 50 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation, deviation) end elseif !iseven(marker) if row.ZUGEREIGNIS_TYP == 10 || row.ZUGEREIGNIS_TYP == 40 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation, deviation) end end end push!(deviationArray, deviationStation) end marker += 1 end totalDeviationArray = Any[] trainRuns = Any[] quantileNr = 1 for quantile in settings.quantile quantileLine = Any[] for row in deviationArray x = quantile!(row, parse(Float64, quantile) / 100) push!(quantileLine, x) if quantileNr == 1 y = mean(row) push!(totalDeviationArray, y) z = size(row, 1) push!(trainRuns, z) end end nameColumn = "quantile" * string(quantileNr) dataDirection1[!, nameColumn] = quantileLine quantileNr += 1 end dataDirection1[!, :AverageDelay] = totalDeviationArray dataDirection1[!, :TrainRuns] = trainRuns #saveDataFrame(dataDirection1, settings, "dataframe") return dataDirection1 end """ Function is calculating the selected quantiles for each registration point for both directions. Only the two selected days are being checked. """ function calculateQuantileForDay(dataDirection1, perfDataDirection1, settings) if size(perfDataDirection1, 1) != 0 deviationArray1 = Any[] #for deviation shown with quantile totalDeviationArray1 = Any[] #for average deviation deviationArray2 = Any[] #for deviation shown with quantile totalDeviationArray2 = Any[] #for average deviation marker = 1 #to make clear if railway object is arriving or departing for station in eachrow(dataDirection1) stationPerfData = perfDataDirection1[ perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station, :, ] if iseven(marker) station.DepOrArr = "A" end deviationStation1 = Any[] deviationStation2 = Any[] for row in eachrow(stationPerfData) if row.dayname == settings.timePeriod[1] deviation = 0 if iseven(marker) if row.ZUGEREIGNIS_TYP == 20 || row.ZUGEREIGNIS_TYP == 50 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation1, deviation) end elseif !iseven(marker) if row.ZUGEREIGNIS_TYP == 10 || row.ZUGEREIGNIS_TYP == 40 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation1, deviation) end end elseif row.dayname == settings.timePeriod[2] deviation = 0 if iseven(marker) if row.ZUGEREIGNIS_TYP == 20 || row.ZUGEREIGNIS_TYP == 50 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation2, deviation) end elseif !iseven(marker) if row.ZUGEREIGNIS_TYP == 10 || row.ZUGEREIGNIS_TYP == 40 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation2, deviation) end end #println(station.station) #println(deviation) end end if length(deviationStation1) == 0 deviationStation1 = [0] elseif length(deviationStation2) == 0 deviationStation2 = [0] end push!(deviationArray1, deviationStation1) push!(deviationArray2, deviationStation2) marker += 1 end ###################################################### d = 1 for day in settings.timePeriod if d == 1 deviationArray = deviationArray1 else deviationArray = deviationArray2 end dayTime = Any[] averageDay = Any[] trainRuns = Any[] for row in deviationArray x = quantile!(row, parse(Float64, settings.quantile[1]) / 100) push!(dayTime, x) y = mean(row) push!(averageDay, y) z = size(row, 1) if z == 1 z = 0 end #stations with no data push!(trainRuns, z) end nameColumn1 = "quantile" * settings.quantile[1] * "_" * day dataDirection1[!, nameColumn1] = dayTime nameColumn1 = "average_" * day dataDirection1[!, nameColumn1] = averageDay dataDirection1[!, "train runs "*day] = trainRuns d += 1 end #saveDataFrame(dataDirection1, settings, "dataframe") return dataDirection1 else println("There is no data for this direction and this line.") data = DataFrame() return data end end """ Function is calculating the selected quantiles for each registration point for both directions. Match days are compared with "normal" days. """ function calculateQuantileForGame(dataDirection1, perfDataDirection1, settings) if size(perfDataDirection1, 1) != 0 deviationArray1 = Any[] #for deviation shown with quantile totalDeviationArray1 = Any[] #for average deviation deviationArray2 = Any[] #for deviation shown with quantile totalDeviationArray2 = Any[] #for average deviation marker = 1 #to make clear if railway object is arriving or departing for station in eachrow(dataDirection1) stationPerfData = perfDataDirection1[ perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station, :, ] if iseven(marker) station.DepOrArr = "A" end deviationStation1 = Any[] deviationStation2 = Any[] for row in eachrow(stationPerfData) if row.day in settings.gamedays deviation = 0 if iseven(marker) if row.ZUGEREIGNIS_TYP == 20 || row.ZUGEREIGNIS_TYP == 50 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation1, deviation) end elseif !iseven(marker) if row.ZUGEREIGNIS_TYP == 10 || row.ZUGEREIGNIS_TYP == 40 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation1, deviation) end end else deviation = 0 if iseven(marker) if row.ZUGEREIGNIS_TYP == 20 || row.ZUGEREIGNIS_TYP == 50 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation2, deviation) end elseif !iseven(marker) if row.ZUGEREIGNIS_TYP == 10 || row.ZUGEREIGNIS_TYP == 40 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation2, deviation) end end #println(station.station) #println(deviation) end end if length(deviationStation1) == 0 deviationStation1 = [0] elseif length(deviationStation2) == 0 deviationStation2 = [0] end push!(deviationArray1, deviationStation1) push!(deviationArray2, deviationStation2) marker += 1 end ###################################################### d = 1 for game in ["yes", "no"] if d == 1 deviationArray = deviationArray1 else deviationArray = deviationArray2 end dayTime = Any[] averageDay = Any[] for row in deviationArray x = quantile!(row, parse(Float64, settings.quantile[1]) / 100) push!(dayTime, x) y = mean(row) push!(averageDay, y) end nameColumn1 = "quantile" * settings.quantile[1] * "_" * game dataDirection1[!, nameColumn1] = dayTime nameColumn1 = "average_" * game dataDirection1[!, nameColumn1] = averageDay d += 1 end return dataDirection1 else println("There is no data for this direction and this line.") data = DataFrame() return data end end """ Function is calculating the selected quantiles for each registration point for both directions. Only registration points within the selected time period are being checked. """ function calculateQuantileForTimePeriod( dataDirection1, perfDataDirection1, settings, ) if size(perfDataDirection1, 1) != 0 deviationArray1 = Any[] #for deviation shown with quantile totalDeviationArray1 = Any[] #for average deviation deviationArray2 = Any[] #for deviation shown with quantile totalDeviationArray2 = Any[] #for average deviation marker = 1 #to make clear if railway object is arriving or departing for station in eachrow(dataDirection1) stationPerfData = perfDataDirection1[ perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station, :, ] if iseven(marker) station.DepOrArr = "A" end deviationStation1 = Any[] deviationStation2 = Any[] for row in eachrow(stationPerfData) if row.rushHour == "yes" deviation = 0 if iseven(marker) if row.ZUGEREIGNIS_TYP == 20 || row.ZUGEREIGNIS_TYP == 50 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation1, deviation) end elseif !iseven(marker) if row.ZUGEREIGNIS_TYP == 10 || row.ZUGEREIGNIS_TYP == 40 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation1, deviation) end end else deviation = 0 if iseven(marker) if row.ZUGEREIGNIS_TYP == 20 || row.ZUGEREIGNIS_TYP == 50 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation2, deviation) end elseif !iseven(marker) if row.ZUGEREIGNIS_TYP == 10 || row.ZUGEREIGNIS_TYP == 40 deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(deviationStation2, deviation) end end #println(station.station) #println(deviation) end end if length(deviationStation1) == 0 deviationStation1 = [0] elseif length(deviationStation2) == 0 deviationStation2 = [0] end push!(deviationArray1, deviationStation1) push!(deviationArray2, deviationStation2) marker += 1 end ###################################################### d = 1 for rushHour in ["yes", "no"] if d == 1 deviationArray = deviationArray1 else deviationArray = deviationArray2 end time = Any[] average = Any[] for row in deviationArray x = quantile!(row, parse(Float64, settings.quantile[1]) / 100) push!(time, x) y = mean(row) push!(average, y) end nameColumn1 = "quantile" * settings.quantile[1] * "_" * rushHour dataDirection1[!, nameColumn1] = time nameColumn1 = "average_" * rushHour dataDirection1[!, nameColumn1] = average d += 1 end return dataDirection1 else println("There is no data for this direction and this line.") data = DataFrame() return data end end """ Stations with no data at all are deleted from the station list and the list is being modfied. """ function deleteEmptyStations(dataDirection1, perfDataDirection1) lengthData = size(dataDirection1.station, 1) for station in eachrow(dataDirection1) stationPerfData = perfDataDirection1[ perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station, :, ] if length(stationPerfData.ZUGEREIGNIS_DS100) == 0 dataDirection1 = dataDirection1[dataDirection1[:, :station].!=station.station, :] end end marker = lengthData - size(dataDirection1.station, 1) if !iseven(marker) && marker != 0 stationList = (dataDirection1.station) popfirst!(stationList) dataDirection = DataFrame() dataDirection[!, :station] = stationList dataDirection[!, :DepOrArr] = fill("D", size(dataDirection, 1)) return dataDirection else return dataDirection1 end end """ Function creates a "black list" for each week and saves how often each train number and train number-station combination is represented in the weekly list. """ function top100(df1, settings) # only departures are being analyzed perfData = df1[df1[:, :ZUGEREIGNIS_TYP].==40, :] x = df1[df1[:, :ZUGEREIGNIS_TYP].==10, :] append!(perfData, x) # first and final day of analysis; each week is being checked finalDay = Dates.Date("11.10.2017", "dd.mm.yyyy") firstDay = Dates.Date("01.09.2017", "dd.mm.yyyy HH:MM") lastDay = (Dates.Date(firstDay) + Dates.Week(1) - Dates.Day(1)) d = firstDay:Dates.Day(1):lastDay fDreached = false # is the final day already reached? topAll = Any[] topNum = Any[] while fDreached == false d = firstDay:Dates.Day(1):lastDay #println(d) #println(size(perfData, 1)) week = filter( row -> Dates.Date(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM") in d, perfData, ) devA = Any[] deviation = 0 for row in eachrow(week) deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(devA, deviation / 60) end week[!, :deviation] = devA # for each week the data is being sorted by deviation sort!(week, :deviation, rev = true) i = 1 numbers = unique(week.ZUGEREIGNIS_ZUGNUMMER) # for lowest deviation: #reverse!(numbers) #sort!(week, :deviation, rev = false) for row in eachrow(week) if i <= 25 push!(topNum, numbers[i]) i += 1 push!( topAll, row.ZUGEREIGNIS_DS100 * "," * string(row.ZUGEREIGNIS_ZUGNUMMER), ) end end saveOutput(week, settings) fDreached = (finalDay in d) firstDay = lastDay + Dates.Day(1) lastDay = firstDay + Dates.Week(1) - Dates.Day(1) end df = DataFrame(countmap(topAll)) #println(countmap(topAll)) #println(countmap(topNum)) df3 = countmap(topNum) saveOutput(df, settings) sleep(1) saveOutput(df3, settings) end """ A single train number is being analyzed. """ function top1(df, settings) # single train number is being analyzed perfData = df[df[:, :ZUGEREIGNIS_ZUGNUMMER].==parse(Int, settings.mode), :] #perfData = perfData[perfData[:, :ZUGEREIGNIS_DS100] .== "TRX",:] y = perfData[perfData[:, :ZUGEREIGNIS_TYP].==40, :] x = perfData[perfData[:, :ZUGEREIGNIS_TYP].==10, :] perfData = append!(y, x) devA = Any[] for row in eachrow(perfData) deviation = Dates.value( Second( convert( Dates.Second, Dates.DateTime( row.ZUGEREIGNIS_ISTZEIT, "dd.mm.yyyy HH:MM", ) - Dates.DateTime( row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM", ), ), ), ) push!(devA, deviation) end med = median(devA) av = mean(devA) println("Median: "*string(med)) println("Average :"*string(av)) println("Train Runs: "*string(length(devA))) #println(settings.mode) end end