499 lines
16 KiB
Julia
499 lines
16 KiB
Julia
module performance
|
|
# main module
|
|
# functions are being called
|
|
# data is being sorted and organized
|
|
|
|
include("./types.jl")
|
|
include("./output.jl")
|
|
include("./plotting.jl")
|
|
include("./betweenRegistrationPoints.jl")
|
|
include("./registrationPoints.jl")
|
|
|
|
import YAML
|
|
|
|
using CSV, Dates, DataFrames, Statistics, StatsBase
|
|
using .types
|
|
using .output
|
|
using .plotting
|
|
using .betweenRegistrationPoints
|
|
using .registrationPoints
|
|
|
|
export startAnalyzing
|
|
|
|
function startAnalyzing(filePathSettings)
|
|
settings = readSettings(filePathSettings)
|
|
memory = Memory()
|
|
|
|
# if needed the match days are collected
|
|
if settings.timePeriod[1] == "match day"
|
|
df = DataFrame(CSV.File(settings.estimatedTimesPath))
|
|
df_station = df[df[:, :ZUGEREIGNIS_LINIE].==11, :]
|
|
settings.gamedays = df_station.SERVICE_START_ZEIT
|
|
unique!(settings.gamedays)
|
|
end
|
|
# performance data is being sorted
|
|
perfData = @time readPerfData(settings)
|
|
|
|
"""
|
|
The following conditional evaluation calls the different functions.
|
|
Possible modes: "statistical variation", "black list", "train number".
|
|
For "statistical variation" two different approaches are being used.
|
|
For "black list" or a single "train number" all selected line numbers are
|
|
analyzed to get detailed information about the deviation for each
|
|
registration point in one DataFrame. The function "top100" creates "black
|
|
lists" for each week, the function "top1" analyzes a single train number
|
|
for the average deviation and the median.
|
|
"""
|
|
|
|
if settings.mode != "statistical variation"
|
|
#for "black list or single train number
|
|
allPerfData = DataFrame()
|
|
currentLine = 1
|
|
for line in settings.allLines
|
|
settings.analyzedLine = line
|
|
perfDataLine = @time createFiles(perfData, settings)
|
|
settings.commonStations = intersect(
|
|
settings.commonStations,
|
|
settings.stationLists[currentLine],
|
|
)
|
|
if currentLine == 1
|
|
allPerfData = perfDataLine
|
|
else
|
|
append!(allPerfData, perfDataLine)
|
|
end
|
|
currentLine += 1
|
|
println("")
|
|
end
|
|
if settings.mode == "black list"
|
|
@time top100(allPerfData, settings)
|
|
else
|
|
@time top1(allPerfData, settings)
|
|
end
|
|
else # settings.mode == "statistical variation"
|
|
if settings.approach == "registration points"
|
|
# deviation at each registration point
|
|
settings.commonStations = collect(keys(settings.stationDict))
|
|
if settings.objectInFocus == "single line"
|
|
allPerfData = @time createFiles(perfData, settings)
|
|
quantileD1, quantileD2 =
|
|
@time analyzeStatisticalVariation(allPerfData, settings)
|
|
@time plotEverything(quantileD1, quantileD2, settings, memory)
|
|
elseif settings.objectInFocus == "all lines"
|
|
for line in settings.allLines
|
|
settings.analyzedLine = line
|
|
linePerfData = @time createFiles(perfData, settings)
|
|
q1, q2 = @time analyzeStatisticalVariation(
|
|
linePerfData,
|
|
settings,
|
|
)
|
|
@time plotEverything(q1, q2, settings, memory)
|
|
println("")
|
|
end
|
|
end
|
|
elseif settings.approach == "between registration points"
|
|
settings.commonStations = collect(keys(settings.stationDict))
|
|
if settings.objectInFocus == "single line"
|
|
allPerfData = @time createFiles(perfData, settings)
|
|
plotData1, plotData2 =
|
|
@time getDifferences(allPerfData, settings)
|
|
@time plotEverything(plotData1, plotData2, settings, memory)
|
|
elseif settings.objectInFocus == "all lines"
|
|
for line in settings.allLines
|
|
settings.analyzedLine = line
|
|
allPerfData = createFiles(perfData, settings)
|
|
plotData1, plotData2 =
|
|
@time getDifferences(allPerfData, settings)
|
|
@time plotEverything(plotData1, plotData2, settings, memory)
|
|
println("")
|
|
end
|
|
end
|
|
else
|
|
error("ERROR: No approach has been selected. Please do so.")
|
|
end
|
|
end
|
|
|
|
|
|
end
|
|
|
|
"""
|
|
The function is sorting the performance data and deleting duplicates. If only
|
|
specific days are needed, other days will be deleted or marked.
|
|
"""
|
|
|
|
function readPerfData(settings)
|
|
perfData =
|
|
DataFrame(CSV.File(settings.realTimeDataPath; header = 1, delim = ";"))
|
|
|
|
sizePerfData = size(perfData, 1)
|
|
println(
|
|
"The file ",
|
|
settings.realTimeDataPath,
|
|
" has ",
|
|
sizePerfData,
|
|
" rows.",
|
|
)
|
|
|
|
if settings.timePeriod[1] != "no"
|
|
perfData = selectSpecificDays(perfData, settings)
|
|
end
|
|
|
|
# duplicates are being deleted
|
|
select!(perfData, Not(:QUELLE_SENDER))
|
|
select!(perfData, Not(:EINGANGSZEIT))
|
|
perfData[!, :single] = ((nonunique(perfData)))
|
|
perfData = perfData[perfData[:, :single].==false, :]
|
|
select!(perfData, Not(:single))
|
|
|
|
for row in eachrow(perfData)
|
|
if row.ZUGEREIGNIS_DS100 == "TS"
|
|
row.ZUGEREIGNIS_DS100 == "TS T"
|
|
end
|
|
end
|
|
|
|
println(
|
|
"Performance data has been sorted and saved. ",
|
|
sizePerfData - size(perfData, 1),
|
|
" row(s) has/have been deleted.",
|
|
)
|
|
|
|
return perfData
|
|
|
|
end
|
|
|
|
function selectSpecificDays(df1, settings)
|
|
if settings.timePeriod[1] == "match day" # days with match are being marked
|
|
gamedays = Any[]
|
|
day = Any[]
|
|
game = Any[]
|
|
for day in settings.gamedays
|
|
push!(gamedays, Dates.Date(Dates.DateTime(day, "dd.mm.yyyy HH:MM")))
|
|
unique!(gamedays)
|
|
end
|
|
settings.gamedays = copy(gamedays)
|
|
for row in eachrow(df1)
|
|
currentDay = Dates.Date(
|
|
Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"),
|
|
)
|
|
push!(day, currentDay)
|
|
if currentDay in settings.gamedays
|
|
push!(game, "yes")
|
|
else
|
|
push!(game, "no")
|
|
end
|
|
end
|
|
df1[!, :day] = day
|
|
df1[!, :game] = game
|
|
df_new = copy(df1)
|
|
#df_day = filter(row -> row[:day] in settings.gamedays, df1)
|
|
|
|
elseif settings.timePeriod[1] == "rush hour" # rush hour or not
|
|
rushHour = Any[]
|
|
startM = parse(Float64, settings.timePeriod[2])
|
|
endM = parse(Float64, settings.timePeriod[3])
|
|
startE = parse(Float64, settings.timePeriod[4])
|
|
endE = parse(Float64, settings.timePeriod[5])
|
|
for row in eachrow(df1)
|
|
currentH = Dates.Hour(
|
|
Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"),
|
|
)
|
|
currentM = Dates.Minute(
|
|
Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"),
|
|
)
|
|
current = real(currentH.value) + real(currentM.value) / 100
|
|
if (current >= startM && current <= endM) ||
|
|
(current >= startE && current <= endE)
|
|
push!(rushHour, "yes")
|
|
else
|
|
push!(rushHour, "no")
|
|
end
|
|
end
|
|
df1[!, :rushHour] = rushHour
|
|
df_new = copy(df1)
|
|
saveOutput(df_new, settings)
|
|
else # comparison of two weekdays
|
|
df1[!, :dayname] = fill("day undefined", size(df1, 1))
|
|
for row in eachrow(df1)
|
|
if Dates.dayname(
|
|
Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"),
|
|
) == settings.timePeriod[1]
|
|
row.dayname = settings.timePeriod[1] # day 1
|
|
elseif Dates.dayname(
|
|
Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"),
|
|
) == settings.timePeriod[2]
|
|
row.dayname = settings.timePeriod[2] # day 2
|
|
end
|
|
end
|
|
df_new = df1[df1[:, :dayname].!="day undefined", :]
|
|
end
|
|
|
|
return df_new
|
|
|
|
end
|
|
|
|
"""
|
|
Settings are being saved and a dictionary for the station names is being created.
|
|
"""
|
|
|
|
function readSettings(filePathSettings)
|
|
data = YAML.load(open(filePathSettings))
|
|
setting = Settings()
|
|
|
|
if haskey(data["settings"], "outputFilePath")
|
|
setting.outputFilePath = data["settings"]["outputFilePath"]
|
|
delete!(data["settings"], "outputFilePath")
|
|
else
|
|
error(
|
|
"ERROR at reading the settings yaml file: The keyword outputFilePath
|
|
is missing. It has to be added.",
|
|
)
|
|
end
|
|
|
|
if haskey(data["settings"], "objectInFocus")
|
|
setting.objectInFocus = data["settings"]["objectInFocus"]
|
|
delete!(data["settings"], "objectInFocus")
|
|
else
|
|
error(
|
|
"ERROR at reading the settings yaml file: The keyword objectInFocus
|
|
is missing. It has to be added.",
|
|
)
|
|
end
|
|
|
|
if haskey(data["settings"], "timePeriod")
|
|
setting.timePeriod = data["settings"]["timePeriod"]
|
|
delete!(data["settings"], "timePeriod")
|
|
else
|
|
error(
|
|
"ERROR at reading the settings yaml file: The keyword timePeriod is
|
|
missing. It has to be added.",
|
|
)
|
|
end
|
|
|
|
if haskey(data["settings"], "analyzedLine")
|
|
setting.analyzedLine = data["settings"]["analyzedLine"]
|
|
delete!(data["settings"], "analyzedLine")
|
|
else
|
|
error(
|
|
"ERROR at reading the settings yaml file: The keyword analyzedLine
|
|
is missing. It has to be added.",
|
|
)
|
|
end
|
|
|
|
if haskey(data["settings"], "estimatedTimesPath")
|
|
setting.estimatedTimesPath = data["settings"]["estimatedTimesPath"]
|
|
delete!(data["settings"], "estimatedTimesPath")
|
|
else
|
|
error(
|
|
"ERROR at reading the settings yaml file: The keyword
|
|
estimatedTimesPath is missing. It has to be added.",
|
|
)
|
|
end
|
|
|
|
if haskey(data["settings"], "realTimeDataPath")
|
|
setting.realTimeDataPath = data["settings"]["realTimeDataPath"]
|
|
delete!(data["settings"], "realTimeDataPath")
|
|
else
|
|
error(
|
|
"ERROR at reading the settings yaml file: The keyword realTimeData
|
|
is missing. It has to be added.",
|
|
)
|
|
end
|
|
|
|
if haskey(data["settings"], "stationsListPath")
|
|
setting.stationsListPath = data["settings"]["stationsListPath"]
|
|
delete!(data["settings"], "stationsListPath")
|
|
else
|
|
error(
|
|
"ERROR at reading the settings yaml file: The keyword
|
|
stationsListPath is missing. It has to be added.",
|
|
)
|
|
end
|
|
|
|
if haskey(data["settings"], "mode")
|
|
setting.mode = data["settings"]["mode"]
|
|
delete!(data["settings"], "mode")
|
|
else
|
|
error(
|
|
"ERROR at reading the settings yaml file: The keyword mode is
|
|
missing. It has to be added.",
|
|
)
|
|
end
|
|
|
|
if haskey(data["settings"], "allLines")
|
|
setting.allLines = data["settings"]["allLines"]
|
|
delete!(data["settings"], "allLines")
|
|
else
|
|
error(
|
|
"ERROR at reading the settings yaml file: The keyword allLines is
|
|
missing. It has to be added.",
|
|
)
|
|
end
|
|
|
|
if haskey(data["settings"], "quantile")
|
|
setting.quantile = data["settings"]["quantile"]
|
|
delete!(data["settings"], "quantile")
|
|
else
|
|
error(
|
|
"ERROR at reading the settings yaml file: The keyword quantile is
|
|
missing. It has to be added.",
|
|
)
|
|
end
|
|
|
|
if haskey(data["settings"], "approach")
|
|
setting.approach = data["settings"]["approach"]
|
|
delete!(data["settings"], "approach")
|
|
else
|
|
error(
|
|
"ERROR at reading the settings yaml file: The keyword singleQuantile
|
|
is missing. It has to be added.",
|
|
)
|
|
end
|
|
|
|
|
|
# station dict for DS100 => name of station
|
|
stationDict = createStationDict(readlines(open(setting.stationsListPath)))
|
|
stationDict["TFL"] = "Stuttgart Flughafen Messe"
|
|
stationDict["TBO"] = "Boeblingen"
|
|
setting.stationDict = stationDict
|
|
|
|
return setting
|
|
end
|
|
|
|
function createStationDict(stationDict)
|
|
dic = Dict()
|
|
for x in stationDict
|
|
substring = (split(x, ";"))
|
|
push!(dic, substring[2] => substring[3])
|
|
end
|
|
return dic
|
|
end
|
|
|
|
"""
|
|
For the selected line number the estimated times are being checked. The station
|
|
sequence is being read and a direction is assigned to each train number.
|
|
"""
|
|
|
|
function createFiles(perfData, settings)
|
|
trainNumber = readLineData(settings)
|
|
perfData = editFile(settings, perfData, trainNumber)
|
|
return perfData
|
|
end
|
|
|
|
function readLineData(settings)
|
|
df = DataFrame(CSV.File(settings.estimatedTimesPath))
|
|
df1 = df[df[:, :ZUGEREIGNIS_LINIE].==parse(Int, settings.analyzedLine), :]
|
|
trainNumber = unique(df1.ZUGEREIGNIS_ZUGNUMMER)
|
|
|
|
# sort the data in train sets
|
|
df1 = sort!(df1, [:SERVICE_ID, :SERVICE_START_ZEIT], rev = (false, true))
|
|
|
|
#row count for a better organisation
|
|
df1[!, :rownumber] = axes(df1, 1)
|
|
|
|
maxHALT_NR = maximum(df1.SERVICE_HALTNR)
|
|
newTrains = findall(x -> x == 1, df1.SERVICE_HALTNR)
|
|
|
|
endOfMaxStopsTrains = findall(x -> x == maxHALT_NR, df1.SERVICE_HALTNR)
|
|
endOfMaxStopsTrains = filter!(x -> x >= newTrains[1], endOfMaxStopsTrains)
|
|
|
|
i = 0
|
|
for x in newTrains
|
|
if x == endOfMaxStopsTrains[1] - maxHALT_NR + 1
|
|
i += 1
|
|
break
|
|
else
|
|
i += 1
|
|
end
|
|
end
|
|
|
|
|
|
# station lists for both directions are being created
|
|
i = newTrains[i]
|
|
|
|
stationsList = Any[]
|
|
while df1.SERVICE_HALTNR[i] != maxHALT_NR
|
|
push!(stationsList, df1.ZUGEREIGNIS_DS100[i])
|
|
i += 1
|
|
end
|
|
push!(stationsList, df1.ZUGEREIGNIS_DS100[i])
|
|
|
|
#saving the stationList in settings
|
|
push!(settings.stationLists, stationsList)
|
|
|
|
stationsListOneWay = unique(stationsList)
|
|
stationsListOtherWay = reverse(stationsList)
|
|
|
|
println(
|
|
"Line ",
|
|
settings.analyzedLine,
|
|
" is connecting ",
|
|
settings.stationDict[stationsListOneWay[1]],
|
|
" and ",
|
|
settings.stationDict[stationsListOneWay[size(stationsListOneWay, 1)]],
|
|
)
|
|
|
|
return trainNumber
|
|
end
|
|
|
|
function editFile(settings, perfData, trainNumber)
|
|
perfData =
|
|
filter(row -> row[:ZUGEREIGNIS_ZUGNUMMER] in trainNumber, perfData)
|
|
|
|
if settings.objectInFocus == "single line"
|
|
lineNr = 1
|
|
else
|
|
lineNr = findall(x -> x == settings.analyzedLine, settings.allLines)
|
|
lineNr = lineNr[1]
|
|
end
|
|
|
|
stationList = settings.stationLists[lineNr]
|
|
directionE = "" # direction of trains with even train numbers
|
|
directionU = "" # direction of trains with uneven train numbers
|
|
direction = Any[]
|
|
|
|
perfData[!, :rownumber] = axes(perfData, 1)
|
|
for row in eachrow(perfData)
|
|
if row.ZUGEREIGNIS_TYP == 10 && row.ZUGEREIGNIS_DS100 == stationList[1]
|
|
if iseven(row.ZUGEREIGNIS_ZUGNUMMER)
|
|
directionE = stationList[length(stationList)]
|
|
directionU = stationList[1]
|
|
else
|
|
directionU = stationList[length(stationList)]
|
|
directionE = stationList[1]
|
|
end
|
|
break
|
|
end
|
|
end
|
|
|
|
for row in eachrow(perfData)
|
|
if iseven(row.ZUGEREIGNIS_ZUGNUMMER)
|
|
push!(direction, directionE)
|
|
else
|
|
push!(direction, directionU)
|
|
end
|
|
end
|
|
|
|
perfData[!, :ZUGEREIGNIS_RICHTUNG] = direction
|
|
|
|
perfData = sort!(
|
|
perfData,
|
|
[:SERVICE_ID, :ZUGEREIGNIS_SOLLZEIT],
|
|
rev = (true, false),
|
|
)
|
|
|
|
|
|
perfData[!, :ZUGEREIGNIS_LINIE] =
|
|
fill(settings.analyzedLine, size(perfData, 1))
|
|
|
|
println(
|
|
"Performance Data for line " *
|
|
settings.analyzedLine *
|
|
" has been modified.",
|
|
)
|
|
|
|
return perfData
|
|
end
|
|
|
|
|
|
|
|
end
|