module performance # main module # functions are being called # data is being sorted and organized include("./types.jl") include("./output.jl") include("./plotting.jl") include("./betweenRegistrationPoints.jl") include("./registrationPoints.jl") import YAML using CSV, Dates, DataFrames, Statistics, StatsBase using .types using .output using .plotting using .betweenRegistrationPoints using .registrationPoints export startAnalyzing function startAnalyzing(filePathSettings) settings = readSettings(filePathSettings) memory = Memory() # if needed the match days are collected if settings.timePeriod[1] == "match day" df = DataFrame(CSV.File(settings.estimatedTimesPath)) df_station = df[df[:, :ZUGEREIGNIS_LINIE].==11, :] settings.gamedays = df_station.SERVICE_START_ZEIT unique!(settings.gamedays) end # performance data is being sorted perfData = @time readPerfData(settings) """ The following conditional evaluation calls the different functions. Possible modes: "statistical variation", "black list", "train number". For "statistical variation" two different approaches are being used. For "black list" or a single "train number" all selected line numbers are analyzed to get detailed information about the deviation for each registration point in one DataFrame. The function "top100" creates "black lists" for each week, the function "top1" analyzes a single train number for the average deviation and the median. """ if settings.mode != "statistical variation" #for "black list or single train number allPerfData = DataFrame() currentLine = 1 for line in settings.allLines settings.analyzedLine = line perfDataLine = @time createFiles(perfData, settings) settings.commonStations = intersect( settings.commonStations, settings.stationLists[currentLine], ) if currentLine == 1 allPerfData = perfDataLine else append!(allPerfData, perfDataLine) end currentLine += 1 println("") end if settings.mode == "black list" @time top100(allPerfData, settings) else @time top1(allPerfData, settings) end else # settings.mode == "statistical variation" if settings.approach == "registration points" # deviation at each registration point settings.commonStations = collect(keys(settings.stationDict)) if settings.objectInFocus == "single line" allPerfData = @time createFiles(perfData, settings) quantileD1, quantileD2 = @time analyzeStatisticalVariation(allPerfData, settings) @time plotEverything(quantileD1, quantileD2, settings, memory) elseif settings.objectInFocus == "all lines" for line in settings.allLines settings.analyzedLine = line linePerfData = @time createFiles(perfData, settings) q1, q2 = @time analyzeStatisticalVariation( linePerfData, settings, ) @time plotEverything(q1, q2, settings, memory) println("") end end elseif settings.approach == "between registration points" settings.commonStations = collect(keys(settings.stationDict)) if settings.objectInFocus == "single line" allPerfData = @time createFiles(perfData, settings) plotData1, plotData2 = @time getDifferences(allPerfData, settings) @time plotEverything(plotData1, plotData2, settings, memory) elseif settings.objectInFocus == "all lines" for line in settings.allLines settings.analyzedLine = line allPerfData = createFiles(perfData, settings) plotData1, plotData2 = @time getDifferences(allPerfData, settings) @time plotEverything(plotData1, plotData2, settings, memory) println("") end end else error("ERROR: No approach has been selected. Please do so.") end end end """ The function is sorting the performance data and deleting duplicates. If only specific days are needed, other days will be deleted or marked. """ function readPerfData(settings) perfData = DataFrame(CSV.File(settings.realTimeDataPath; header = 1, delim = ";")) sizePerfData = size(perfData, 1) println( "The file ", settings.realTimeDataPath, " has ", sizePerfData, " rows.", ) if settings.timePeriod[1] != "no" perfData = selectSpecificDays(perfData, settings) end # duplicates are being deleted select!(perfData, Not(:QUELLE_SENDER)) select!(perfData, Not(:EINGANGSZEIT)) perfData[!, :single] = ((nonunique(perfData))) perfData = perfData[perfData[:, :single].==false, :] select!(perfData, Not(:single)) for row in eachrow(perfData) if row.ZUGEREIGNIS_DS100 == "TS" row.ZUGEREIGNIS_DS100 == "TS T" end end println( "Performance data has been sorted and saved. ", sizePerfData - size(perfData, 1), " row(s) has/have been deleted.", ) return perfData end function selectSpecificDays(df1, settings) if settings.timePeriod[1] == "match day" # days with match are being marked gamedays = Any[] day = Any[] game = Any[] for day in settings.gamedays push!(gamedays, Dates.Date(Dates.DateTime(day, "dd.mm.yyyy HH:MM"))) unique!(gamedays) end settings.gamedays = copy(gamedays) for row in eachrow(df1) currentDay = Dates.Date( Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"), ) push!(day, currentDay) if currentDay in settings.gamedays push!(game, "yes") else push!(game, "no") end end df1[!, :day] = day df1[!, :game] = game df_new = copy(df1) #df_day = filter(row -> row[:day] in settings.gamedays, df1) elseif settings.timePeriod[1] == "rush hour" # rush hour or not rushHour = Any[] startM = parse(Float64, settings.timePeriod[2]) endM = parse(Float64, settings.timePeriod[3]) startE = parse(Float64, settings.timePeriod[4]) endE = parse(Float64, settings.timePeriod[5]) for row in eachrow(df1) currentH = Dates.Hour( Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"), ) currentM = Dates.Minute( Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"), ) current = real(currentH.value) + real(currentM.value) / 100 if (current >= startM && current <= endM) || (current >= startE && current <= endE) push!(rushHour, "yes") else push!(rushHour, "no") end end df1[!, :rushHour] = rushHour df_new = copy(df1) saveOutput(df_new, settings) else # comparison of two weekdays df1[!, :dayname] = fill("day undefined", size(df1, 1)) for row in eachrow(df1) if Dates.dayname( Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"), ) == settings.timePeriod[1] row.dayname = settings.timePeriod[1] # day 1 elseif Dates.dayname( Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"), ) == settings.timePeriod[2] row.dayname = settings.timePeriod[2] # day 2 end end df_new = df1[df1[:, :dayname].!="day undefined", :] end return df_new end """ Settings are being saved and a dictionary for the station names is being created. """ function readSettings(filePathSettings) data = YAML.load(open(filePathSettings)) setting = Settings() if haskey(data["settings"], "outputFilePath") setting.outputFilePath = data["settings"]["outputFilePath"] delete!(data["settings"], "outputFilePath") else error( "ERROR at reading the settings yaml file: The keyword outputFilePath is missing. It has to be added.", ) end if haskey(data["settings"], "objectInFocus") setting.objectInFocus = data["settings"]["objectInFocus"] delete!(data["settings"], "objectInFocus") else error( "ERROR at reading the settings yaml file: The keyword objectInFocus is missing. It has to be added.", ) end if haskey(data["settings"], "timePeriod") setting.timePeriod = data["settings"]["timePeriod"] delete!(data["settings"], "timePeriod") else error( "ERROR at reading the settings yaml file: The keyword timePeriod is missing. It has to be added.", ) end if haskey(data["settings"], "analyzedLine") setting.analyzedLine = data["settings"]["analyzedLine"] delete!(data["settings"], "analyzedLine") else error( "ERROR at reading the settings yaml file: The keyword analyzedLine is missing. It has to be added.", ) end if haskey(data["settings"], "estimatedTimesPath") setting.estimatedTimesPath = data["settings"]["estimatedTimesPath"] delete!(data["settings"], "estimatedTimesPath") else error( "ERROR at reading the settings yaml file: The keyword estimatedTimesPath is missing. It has to be added.", ) end if haskey(data["settings"], "realTimeDataPath") setting.realTimeDataPath = data["settings"]["realTimeDataPath"] delete!(data["settings"], "realTimeDataPath") else error( "ERROR at reading the settings yaml file: The keyword realTimeData is missing. It has to be added.", ) end if haskey(data["settings"], "stationsListPath") setting.stationsListPath = data["settings"]["stationsListPath"] delete!(data["settings"], "stationsListPath") else error( "ERROR at reading the settings yaml file: The keyword stationsListPath is missing. It has to be added.", ) end if haskey(data["settings"], "mode") setting.mode = data["settings"]["mode"] delete!(data["settings"], "mode") else error( "ERROR at reading the settings yaml file: The keyword mode is missing. It has to be added.", ) end if haskey(data["settings"], "allLines") setting.allLines = data["settings"]["allLines"] delete!(data["settings"], "allLines") else error( "ERROR at reading the settings yaml file: The keyword allLines is missing. It has to be added.", ) end if haskey(data["settings"], "quantile") setting.quantile = data["settings"]["quantile"] delete!(data["settings"], "quantile") else error( "ERROR at reading the settings yaml file: The keyword quantile is missing. It has to be added.", ) end if haskey(data["settings"], "approach") setting.approach = data["settings"]["approach"] delete!(data["settings"], "approach") else error( "ERROR at reading the settings yaml file: The keyword singleQuantile is missing. It has to be added.", ) end # station dict for DS100 => name of station stationDict = createStationDict(readlines(open(setting.stationsListPath))) stationDict["TFL"] = "Stuttgart Flughafen Messe" stationDict["TBO"] = "Boeblingen" setting.stationDict = stationDict return setting end function createStationDict(stationDict) dic = Dict() for x in stationDict substring = (split(x, ";")) push!(dic, substring[2] => substring[3]) end return dic end """ For the selected line number the estimated times are being checked. The station sequence is being read and a direction is assigned to each train number. """ function createFiles(perfData, settings) trainNumber = readLineData(settings) perfData = editFile(settings, perfData, trainNumber) return perfData end function readLineData(settings) df = DataFrame(CSV.File(settings.estimatedTimesPath)) df1 = df[df[:, :ZUGEREIGNIS_LINIE].==parse(Int, settings.analyzedLine), :] trainNumber = unique(df1.ZUGEREIGNIS_ZUGNUMMER) # sort the data in train sets df1 = sort!(df1, [:SERVICE_ID, :SERVICE_START_ZEIT], rev = (false, true)) #row count for a better organisation df1[!, :rownumber] = axes(df1, 1) maxHALT_NR = maximum(df1.SERVICE_HALTNR) newTrains = findall(x -> x == 1, df1.SERVICE_HALTNR) endOfMaxStopsTrains = findall(x -> x == maxHALT_NR, df1.SERVICE_HALTNR) endOfMaxStopsTrains = filter!(x -> x >= newTrains[1], endOfMaxStopsTrains) i = 0 for x in newTrains if x == endOfMaxStopsTrains[1] - maxHALT_NR + 1 i += 1 break else i += 1 end end # station lists for both directions are being created i = newTrains[i] stationsList = Any[] while df1.SERVICE_HALTNR[i] != maxHALT_NR push!(stationsList, df1.ZUGEREIGNIS_DS100[i]) i += 1 end push!(stationsList, df1.ZUGEREIGNIS_DS100[i]) #saving the stationList in settings push!(settings.stationLists, stationsList) stationsListOneWay = unique(stationsList) stationsListOtherWay = reverse(stationsList) println( "Line ", settings.analyzedLine, " is connecting ", settings.stationDict[stationsListOneWay[1]], " and ", settings.stationDict[stationsListOneWay[size(stationsListOneWay, 1)]], ) return trainNumber end function editFile(settings, perfData, trainNumber) perfData = filter(row -> row[:ZUGEREIGNIS_ZUGNUMMER] in trainNumber, perfData) if settings.objectInFocus == "single line" lineNr = 1 else lineNr = findall(x -> x == settings.analyzedLine, settings.allLines) lineNr = lineNr[1] end stationList = settings.stationLists[lineNr] directionE = "" # direction of trains with even train numbers directionU = "" # direction of trains with uneven train numbers direction = Any[] perfData[!, :rownumber] = axes(perfData, 1) for row in eachrow(perfData) if row.ZUGEREIGNIS_TYP == 10 && row.ZUGEREIGNIS_DS100 == stationList[1] if iseven(row.ZUGEREIGNIS_ZUGNUMMER) directionE = stationList[length(stationList)] directionU = stationList[1] else directionU = stationList[length(stationList)] directionE = stationList[1] end break end end for row in eachrow(perfData) if iseven(row.ZUGEREIGNIS_ZUGNUMMER) push!(direction, directionE) else push!(direction, directionU) end end perfData[!, :ZUGEREIGNIS_RICHTUNG] = direction perfData = sort!( perfData, [:SERVICE_ID, :ZUGEREIGNIS_SOLLZEIT], rev = (true, false), ) perfData[!, :ZUGEREIGNIS_LINIE] = fill(settings.analyzedLine, size(perfData, 1)) println( "Performance Data for line " * settings.analyzedLine * " has been modified.", ) return perfData end end