1
0
Fork 0

Bachelor Thesis Commit

master
Ana Ueberhorst 2021-09-16 15:22:28 +02:00
parent 385fe4eb43
commit 0dea4585ae
9 changed files with 2406 additions and 0 deletions

View File

@ -0,0 +1,34 @@
Um das Programm auszuführen, müssen zunächst die darin enthaltenen Pakete der
Julia-Umgebung hinzugefügt werden. Diese Pakete sind:
- https://github.com/JuliaData/YAML.jl
- https://github.com/quinnj/Dates.jl
- https://github.com/JuliaData/DataFrames.jl
- https://github.com/JuliaData/CSV.jl
- https://github.com/JuliaPlots/Plots.jl
- https://github.com/JuliaLang/Statistics.jl
- https://github.com/JuliaStats/StatsBase.jl
- https://github.com/JuliaPlots/StatsPlots.jl
Mittels der Datei readFile.jl kann das Programm gestartet werden.
Der Pfad der beiliegenden Datei settings.yaml muss allerdings angepasst werden.
In der Datei settings.yaml können die Einstellungen getroffen und
der Speicherort aller Output-Dateien kann angepasst werden.
Auch müssen in der Datei settings.yaml die Speicherorte der folgenden Dateien
aus dem Datensatz der Deutschen Bahn angepasst werden:
- Bahnhofsdaten.csv
- 20170901-20171019_Alle_Sollereignisse_S-Bahn_Stuttgart.csv
- 20170901-20171019_Alle_Istmeldungen_S-Bahn_Stuttgart.csv
Einstellungen für das Plotten können in der Datei plotting.jl getroffen werden.
Das "@time" vor einigen Funktionen kann auswirkungslos entfernt werden. Es
dient allein der Zeitkontrolle.
Bei folgender Fehlermeldung
LoadError: MethodError: no method matching _show(::IOStream,
::MIME{Symbol("application/pdf")}
das Programm einfach erneut starten.

View File

@ -0,0 +1,240 @@
# approach 2: deviation is analyzed for time between the registration points
# time between two stations and time between arrival and departure
module betweenRegistrationPoints
include("./output.jl")
using Statistics, CSV, Dates, DataFrames
using .output
export getDifferences
function getDifferences(modPerfData, settings)
df1, perfDataDirection1, df2, perfDataDirection2 =
prepareData(modPerfData, settings)
plotData1 = calculateDeviation(df1, perfDataDirection1, settings)
plotData2 = calculateDeviation(df2, perfDataDirection2, settings)
return plotData1, plotData2
end
"""
Function is preparing the data if necesary and calling another function
to prepare DataFrames to save the results of the following analyses.
"""
function prepareData(modPerfData, settings)
# determine current line and its station list
if settings.objectInFocus == "single line"
lineNr = 1
else
lineNr = findall(x -> x == settings.analyzedLine, settings.allLines)
lineNr = lineNr[1]
end
stationList1 = settings.stationLists[lineNr]
stationList2 = reverse(stationList1)
df1 = createDataFrame(stationList1, settings)
df2 = createDataFrame(stationList2, settings)
select!(modPerfData, Not(:rownumber))
modPerfData =
filter(row -> row[:ZUGEREIGNIS_DS100] in stationList1, modPerfData)
modPerfData[!, :rownumber] = axes(modPerfData, 1)
df = modPerfData[modPerfData[:, :rownumber].!=0, :]
# the station data of TSS has to be changed (somtimes wrong order)
if stationList1[1] == "TSS" || stationList2[1] == "TSS"
for row in eachrow(modPerfData)
if row.rownumber > 1 && row.ZUGEREIGNIS_DS100 == "TSS"
if modPerfData.ZUGEREIGNIS_DS100[row.rownumber-1] == "TSS" && (
modPerfData.ZUGEREIGNIS_TYP[row.rownumber-1] == 10 ||
modPerfData.ZUGEREIGNIS_TYP[row.rownumber-1] == 50
)
row1 = modPerfData[row.rownumber-1, :]
row2 = modPerfData[row.rownumber, :]
df[row.rownumber, :] = row1
df[row.rownumber-1, :] = row2
end
end
end
end
perfDataDirection1 = df[df[:, :ZUGEREIGNIS_RICHTUNG].==stationList2[1], :]
perfDataDirection2 = df[df[:, :ZUGEREIGNIS_RICHTUNG].==stationList1[1], :]
return df1, perfDataDirection1, df2, perfDataDirection2
end
function createDataFrame(stationList, settings)
df = DataFrame()
point1 = Any[]
point2 = Any[]
counter = 0
if settings.objectInFocus != "Stuttgarter Stammstrecke"
if stationList[1] == "TSS"
pushfirst!(stationList, stationList[1])
counter = convert(Int, (length(stationList) - 1) / 2)
sequence = repeat(["station", "section"], counter)
elseif stationList[size(stationList, 1)] == "TSS"
push!(stationList, stationList[size(stationList, 1)])
counter = convert(Int, (length(stationList) - 1) / 2)
sequence = repeat(["section", "station"], counter)
else
counter = convert(Int, (length(stationList) - 1) / 2 + 0.5)
sequence = repeat(["section", "station"], counter)
pop!(sequence)
end
else
push!(stationList, stationList[length(stationList)])
pushfirst!(stationList, stationList[1])
counter = convert(Int, (length(stationList) - 1) / 2 + 0.5)
sequence = repeat(["station", "section"], counter)
pop!(sequence)
end
#create station list for differences
for i = 2:length(stationList)
push!(point1, stationList[i-1])
push!(point2, stationList[i])
end
df[!, :point1] = point1
df[!, :point2] = point2
df[!, :sequence] = sequence
return df
end
"""
Function is calculating the deviation for each section and stop. Selected
quantiles are being created.
"""
function calculateDeviation(df1, perfData, settings)
deviationArray = Any[]
perfData[!, :row] = axes(perfData, 1)
for row in eachrow(df1)
deviationSequence = Any[]
for rowData in eachrow(perfData)
if rowData.row != 1 &&
perfData.ZUGEREIGNIS_DS100[rowData.row-1] == row.point1 &&
rowData.ZUGEREIGNIS_DS100 == row.point2
if (
rowData.ZUGEREIGNIS_TYP == 10 ||
rowData.ZUGEREIGNIS_TYP == 40
) && (
perfData.ZUGEREIGNIS_TYP[rowData.row-1] == 20 ||
perfData.ZUGEREIGNIS_TYP[rowData.row-1] == 50
)
actual = Second(
convert(
Dates.Second,
Dates.DateTime(
perfData.ZUGEREIGNIS_ISTZEIT[rowData.row],
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
perfData.ZUGEREIGNIS_ISTZEIT[rowData.row-1],
"dd.mm.yyyy HH:MM",
),
),
)
estimated = Second(
convert(
Dates.Second,
Dates.DateTime(
perfData.ZUGEREIGNIS_SOLLZEIT[rowData.row],
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
perfData.ZUGEREIGNIS_SOLLZEIT[rowData.row-1],
"dd.mm.yyyy HH:MM",
),
),
)
deviation = Dates.value(actual - estimated)
push!(deviationSequence, deviation)
elseif (
rowData.ZUGEREIGNIS_TYP == 20 ||
rowData.ZUGEREIGNIS_TYP == 50
) && (
perfData.ZUGEREIGNIS_TYP[rowData.row-1] == 10 ||
perfData.ZUGEREIGNIS_TYP[rowData.row-1] == 40
)
actual = Second(
convert(
Dates.Second,
Dates.DateTime(
perfData.ZUGEREIGNIS_ISTZEIT[rowData.row],
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
perfData.ZUGEREIGNIS_ISTZEIT[rowData.row-1],
"dd.mm.yyyy HH:MM",
),
),
)
estimated = Second(
convert(
Dates.Second,
Dates.DateTime(
perfData.ZUGEREIGNIS_SOLLZEIT[rowData.row],
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
perfData.ZUGEREIGNIS_SOLLZEIT[rowData.row-1],
"dd.mm.yyyy HH:MM",
),
),
)
deviation = Dates.value(actual - estimated)
push!(deviationSequence, deviation)
end
end
end
if length(deviationSequence) == 0
deviationSequence = [10]
row.sequence = "noData"
end
push!(deviationArray, deviationSequence)
end
quantile = ""
quantile = settings.quantile[1]
quantArray = Any[]
average = Any[]
meridian = Float64[]
for row in deviationArray
x = quantile!(row, parse(Float64, quantile) / 100)
y = mean(row)
z = median(row)
push!(quantArray, x)
push!(average, y)
push!(meridian, z)
end
nameColumn = "deviation_" * quantile
df1[!, nameColumn] = quantArray
df1[!, :averageDeviation] = average
df1[!, :median] = meridian
points = String[]
for row in eachrow(df1)
if row.point1 != row.point2
push!(points, row.point1 * "-" * row.point2)
else
push!(points, row.point1)
end
end
select!(df1, Not(:point1))
df1[!, :points] = points
df1 = df1[df1[:, :sequence].!="noData", :]
return df1
end
end

498
checkPerformance.jl Normal file
View File

@ -0,0 +1,498 @@
module performance
# main module
# functions are being called
# data is being sorted and organized
include("./types.jl")
include("./output.jl")
include("./plotting.jl")
include("./betweenRegistrationPoints.jl")
include("./registrationPoints.jl")
import YAML
using CSV, Dates, DataFrames, Statistics, StatsBase
using .types
using .output
using .plotting
using .betweenRegistrationPoints
using .registrationPoints
export startAnalyzing
function startAnalyzing(filePathSettings)
settings = readSettings(filePathSettings)
memory = Memory()
# if needed the match days are collected
if settings.timePeriod[1] == "match day"
df = DataFrame(CSV.File(settings.estimatedTimesPath))
df_station = df[df[:, :ZUGEREIGNIS_LINIE].==11, :]
settings.gamedays = df_station.SERVICE_START_ZEIT
unique!(settings.gamedays)
end
# performance data is being sorted
perfData = @time readPerfData(settings)
"""
The following conditional evaluation calls the different functions.
Possible modes: "statistical variation", "black list", "train number".
For "statistical variation" two different approaches are being used.
For "black list" or a single "train number" all selected line numbers are
analyzed to get detailed information about the deviation for each
registration point in one DataFrame. The function "top100" creates "black
lists" for each week, the function "top1" analyzes a single train number
for the average deviation and the median.
"""
if settings.mode != "statistical variation"
#for "black list or single train number
allPerfData = DataFrame()
currentLine = 1
for line in settings.allLines
settings.analyzedLine = line
perfDataLine = @time createFiles(perfData, settings)
settings.commonStations = intersect(
settings.commonStations,
settings.stationLists[currentLine],
)
if currentLine == 1
allPerfData = perfDataLine
else
append!(allPerfData, perfDataLine)
end
currentLine += 1
println("")
end
if settings.mode == "black list"
@time top100(allPerfData, settings)
else
@time top1(allPerfData, settings)
end
else # settings.mode == "statistical variation"
if settings.approach == "registration points"
# deviation at each registration point
settings.commonStations = collect(keys(settings.stationDict))
if settings.objectInFocus == "single line"
allPerfData = @time createFiles(perfData, settings)
quantileD1, quantileD2 =
@time analyzeStatisticalVariation(allPerfData, settings)
@time plotEverything(quantileD1, quantileD2, settings, memory)
elseif settings.objectInFocus == "all lines"
for line in settings.allLines
settings.analyzedLine = line
linePerfData = @time createFiles(perfData, settings)
q1, q2 = @time analyzeStatisticalVariation(
linePerfData,
settings,
)
@time plotEverything(q1, q2, settings, memory)
println("")
end
end
elseif settings.approach == "between registration points"
settings.commonStations = collect(keys(settings.stationDict))
if settings.objectInFocus == "single line"
allPerfData = @time createFiles(perfData, settings)
plotData1, plotData2 =
@time getDifferences(allPerfData, settings)
@time plotEverything(plotData1, plotData2, settings, memory)
elseif settings.objectInFocus == "all lines"
for line in settings.allLines
settings.analyzedLine = line
allPerfData = createFiles(perfData, settings)
plotData1, plotData2 =
@time getDifferences(allPerfData, settings)
@time plotEverything(plotData1, plotData2, settings, memory)
println("")
end
end
else
error("ERROR: No approach has been selected. Please do so.")
end
end
end
"""
The function is sorting the performance data and deleting duplicates. If only
specific days are needed, other days will be deleted or marked.
"""
function readPerfData(settings)
perfData =
DataFrame(CSV.File(settings.realTimeDataPath; header = 1, delim = ";"))
sizePerfData = size(perfData, 1)
println(
"The file ",
settings.realTimeDataPath,
" has ",
sizePerfData,
" rows.",
)
if settings.timePeriod[1] != "no"
perfData = selectSpecificDays(perfData, settings)
end
# duplicates are being deleted
select!(perfData, Not(:QUELLE_SENDER))
select!(perfData, Not(:EINGANGSZEIT))
perfData[!, :single] = ((nonunique(perfData)))
perfData = perfData[perfData[:, :single].==false, :]
select!(perfData, Not(:single))
for row in eachrow(perfData)
if row.ZUGEREIGNIS_DS100 == "TS"
row.ZUGEREIGNIS_DS100 == "TS T"
end
end
println(
"Performance data has been sorted and saved. ",
sizePerfData - size(perfData, 1),
" row(s) has/have been deleted.",
)
return perfData
end
function selectSpecificDays(df1, settings)
if settings.timePeriod[1] == "match day" # days with match are being marked
gamedays = Any[]
day = Any[]
game = Any[]
for day in settings.gamedays
push!(gamedays, Dates.Date(Dates.DateTime(day, "dd.mm.yyyy HH:MM")))
unique!(gamedays)
end
settings.gamedays = copy(gamedays)
for row in eachrow(df1)
currentDay = Dates.Date(
Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"),
)
push!(day, currentDay)
if currentDay in settings.gamedays
push!(game, "yes")
else
push!(game, "no")
end
end
df1[!, :day] = day
df1[!, :game] = game
df_new = copy(df1)
#df_day = filter(row -> row[:day] in settings.gamedays, df1)
elseif settings.timePeriod[1] == "rush hour" # rush hour or not
rushHour = Any[]
startM = parse(Float64, settings.timePeriod[2])
endM = parse(Float64, settings.timePeriod[3])
startE = parse(Float64, settings.timePeriod[4])
endE = parse(Float64, settings.timePeriod[5])
for row in eachrow(df1)
currentH = Dates.Hour(
Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"),
)
currentM = Dates.Minute(
Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"),
)
current = real(currentH.value) + real(currentM.value) / 100
if (current >= startM && current <= endM) ||
(current >= startE && current <= endE)
push!(rushHour, "yes")
else
push!(rushHour, "no")
end
end
df1[!, :rushHour] = rushHour
df_new = copy(df1)
saveOutput(df_new, settings)
else # comparison of two weekdays
df1[!, :dayname] = fill("day undefined", size(df1, 1))
for row in eachrow(df1)
if Dates.dayname(
Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"),
) == settings.timePeriod[1]
row.dayname = settings.timePeriod[1] # day 1
elseif Dates.dayname(
Dates.DateTime(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM"),
) == settings.timePeriod[2]
row.dayname = settings.timePeriod[2] # day 2
end
end
df_new = df1[df1[:, :dayname].!="day undefined", :]
end
return df_new
end
"""
Settings are being saved and a dictionary for the station names is being created.
"""
function readSettings(filePathSettings)
data = YAML.load(open(filePathSettings))
setting = Settings()
if haskey(data["settings"], "outputFilePath")
setting.outputFilePath = data["settings"]["outputFilePath"]
delete!(data["settings"], "outputFilePath")
else
error(
"ERROR at reading the settings yaml file: The keyword outputFilePath
is missing. It has to be added.",
)
end
if haskey(data["settings"], "objectInFocus")
setting.objectInFocus = data["settings"]["objectInFocus"]
delete!(data["settings"], "objectInFocus")
else
error(
"ERROR at reading the settings yaml file: The keyword objectInFocus
is missing. It has to be added.",
)
end
if haskey(data["settings"], "timePeriod")
setting.timePeriod = data["settings"]["timePeriod"]
delete!(data["settings"], "timePeriod")
else
error(
"ERROR at reading the settings yaml file: The keyword timePeriod is
missing. It has to be added.",
)
end
if haskey(data["settings"], "analyzedLine")
setting.analyzedLine = data["settings"]["analyzedLine"]
delete!(data["settings"], "analyzedLine")
else
error(
"ERROR at reading the settings yaml file: The keyword analyzedLine
is missing. It has to be added.",
)
end
if haskey(data["settings"], "estimatedTimesPath")
setting.estimatedTimesPath = data["settings"]["estimatedTimesPath"]
delete!(data["settings"], "estimatedTimesPath")
else
error(
"ERROR at reading the settings yaml file: The keyword
estimatedTimesPath is missing. It has to be added.",
)
end
if haskey(data["settings"], "realTimeDataPath")
setting.realTimeDataPath = data["settings"]["realTimeDataPath"]
delete!(data["settings"], "realTimeDataPath")
else
error(
"ERROR at reading the settings yaml file: The keyword realTimeData
is missing. It has to be added.",
)
end
if haskey(data["settings"], "stationsListPath")
setting.stationsListPath = data["settings"]["stationsListPath"]
delete!(data["settings"], "stationsListPath")
else
error(
"ERROR at reading the settings yaml file: The keyword
stationsListPath is missing. It has to be added.",
)
end
if haskey(data["settings"], "mode")
setting.mode = data["settings"]["mode"]
delete!(data["settings"], "mode")
else
error(
"ERROR at reading the settings yaml file: The keyword mode is
missing. It has to be added.",
)
end
if haskey(data["settings"], "allLines")
setting.allLines = data["settings"]["allLines"]
delete!(data["settings"], "allLines")
else
error(
"ERROR at reading the settings yaml file: The keyword allLines is
missing. It has to be added.",
)
end
if haskey(data["settings"], "quantile")
setting.quantile = data["settings"]["quantile"]
delete!(data["settings"], "quantile")
else
error(
"ERROR at reading the settings yaml file: The keyword quantile is
missing. It has to be added.",
)
end
if haskey(data["settings"], "approach")
setting.approach = data["settings"]["approach"]
delete!(data["settings"], "approach")
else
error(
"ERROR at reading the settings yaml file: The keyword singleQuantile
is missing. It has to be added.",
)
end
# station dict for DS100 => name of station
stationDict = createStationDict(readlines(open(setting.stationsListPath)))
stationDict["TFL"] = "Stuttgart Flughafen Messe"
stationDict["TBO"] = "Boeblingen"
setting.stationDict = stationDict
return setting
end
function createStationDict(stationDict)
dic = Dict()
for x in stationDict
substring = (split(x, ";"))
push!(dic, substring[2] => substring[3])
end
return dic
end
"""
For the selected line number the estimated times are being checked. The station
sequence is being read and a direction is assigned to each train number.
"""
function createFiles(perfData, settings)
trainNumber = readLineData(settings)
perfData = editFile(settings, perfData, trainNumber)
return perfData
end
function readLineData(settings)
df = DataFrame(CSV.File(settings.estimatedTimesPath))
df1 = df[df[:, :ZUGEREIGNIS_LINIE].==parse(Int, settings.analyzedLine), :]
trainNumber = unique(df1.ZUGEREIGNIS_ZUGNUMMER)
# sort the data in train sets
df1 = sort!(df1, [:SERVICE_ID, :SERVICE_START_ZEIT], rev = (false, true))
#row count for a better organisation
df1[!, :rownumber] = axes(df1, 1)
maxHALT_NR = maximum(df1.SERVICE_HALTNR)
newTrains = findall(x -> x == 1, df1.SERVICE_HALTNR)
endOfMaxStopsTrains = findall(x -> x == maxHALT_NR, df1.SERVICE_HALTNR)
endOfMaxStopsTrains = filter!(x -> x >= newTrains[1], endOfMaxStopsTrains)
i = 0
for x in newTrains
if x == endOfMaxStopsTrains[1] - maxHALT_NR + 1
i += 1
break
else
i += 1
end
end
# station lists for both directions are being created
i = newTrains[i]
stationsList = Any[]
while df1.SERVICE_HALTNR[i] != maxHALT_NR
push!(stationsList, df1.ZUGEREIGNIS_DS100[i])
i += 1
end
push!(stationsList, df1.ZUGEREIGNIS_DS100[i])
#saving the stationList in settings
push!(settings.stationLists, stationsList)
stationsListOneWay = unique(stationsList)
stationsListOtherWay = reverse(stationsList)
println(
"Line ",
settings.analyzedLine,
" is connecting ",
settings.stationDict[stationsListOneWay[1]],
" and ",
settings.stationDict[stationsListOneWay[size(stationsListOneWay, 1)]],
)
return trainNumber
end
function editFile(settings, perfData, trainNumber)
perfData =
filter(row -> row[:ZUGEREIGNIS_ZUGNUMMER] in trainNumber, perfData)
if settings.objectInFocus == "single line"
lineNr = 1
else
lineNr = findall(x -> x == settings.analyzedLine, settings.allLines)
lineNr = lineNr[1]
end
stationList = settings.stationLists[lineNr]
directionE = "" # direction of trains with even train numbers
directionU = "" # direction of trains with uneven train numbers
direction = Any[]
perfData[!, :rownumber] = axes(perfData, 1)
for row in eachrow(perfData)
if row.ZUGEREIGNIS_TYP == 10 && row.ZUGEREIGNIS_DS100 == stationList[1]
if iseven(row.ZUGEREIGNIS_ZUGNUMMER)
directionE = stationList[length(stationList)]
directionU = stationList[1]
else
directionU = stationList[length(stationList)]
directionE = stationList[1]
end
break
end
end
for row in eachrow(perfData)
if iseven(row.ZUGEREIGNIS_ZUGNUMMER)
push!(direction, directionE)
else
push!(direction, directionU)
end
end
perfData[!, :ZUGEREIGNIS_RICHTUNG] = direction
perfData = sort!(
perfData,
[:SERVICE_ID, :ZUGEREIGNIS_SOLLZEIT],
rev = (true, false),
)
perfData[!, :ZUGEREIGNIS_LINIE] =
fill(settings.analyzedLine, size(perfData, 1))
println(
"Performance Data for line " *
settings.analyzedLine *
" has been modified.",
)
return perfData
end
end

45
output.jl Normal file
View File

@ -0,0 +1,45 @@
module output
using DelimitedFiles, CSV, Dates, DataFrames
export saveOutput, saveDataFrame
function saveOutput(perfData, settings)
dateString = Dates.format(Dates.now(), "yyyy-mm-dd_HH.MM.SS")
if settings.mode == "black list"
path = settings.outputFilePath * dateString * "_Top 100.csv"
CSV.write(path, perfData, header = true)
elseif settings.mode == "statistical variation"
# for settings.objectInFocus == "single line" ||
# settings.objectInFocus == "all lines"
path =
settings.outputFilePath *
dateString *
"_" *
settings.mode *
"_" *
settings.objectInFocus *
".csv"
CSV.write(path, perfData, header = true)
else
println("ATTENTION: No output has been created.")
end
end
"""
Function can be called from every module to save DataFrame.
"""
function saveDataFrame(perfData, settings, x)
dateString = Dates.format(Dates.now(), "yyyy-mm-dd_HH.MM.SS")
path =
settings.outputFilePath *
"_Linie_" *
settings.analyzedLine *
dateString *x*".csv"
CSV.write(path, perfData, header = true)
end
end

592
plotting.jl Normal file
View File

@ -0,0 +1,592 @@
module plotting
using StatsPlots, Plots, DataFrames, Statistics, Dates
export plotEverything, plotBars, plotAllDistributions
function plotEverything(df1, df2, settings, memory)
println("Data is being plotted.")
dateString = Dates.format(Dates.now(), "yyyy-mm-dd_HH.MM.SS")
memory.focus = ""
memory.yTicks = ((0-240):60:2000) #change for case
memory.xRotation = 90.0
memory.xLabel = "stations"
memory.yLabel = ""
memory.color = [:chartreuse :yellow2 :lightskyblue :purple]
memory.linewidth = 0
memory.barwidth = 0.7
memory.tickSize = 13
memory.guidefontsize = 20
memory.legendSize = 13
memory.titleSize = 23
memory.legendPosition = :outerright
if settings.approach == "registration points"
memory.yLabel = "deviation in seconds"
memory.focus = "Line " * settings.analyzedLine
memory.size = (2000, 1300)
memory.direction1 =
settings.stationDict[df1.station[length(df1.station)]]
memory.direction2 =
settings.stationDict[df2.station[length(df2.station)]]
if settings.timePeriod[1] != "no" &&
settings.timePeriod[1] != "match day" &&
settings.timePeriod[1] != "rush hour" &&
settings.analyzedLine != "11"
memory.title =
memory.focus *
" - Direction " *
memory.direction1 *
" - " *
settings.timePeriod[1] *
"/ " *
settings.timePeriod[2] *
" - " *
settings.quantile[1] *
" %-Quantile"
p1 = plotBarsDays(df1, settings, memory)
memory.title =
memory.focus *
" - Direction " *
memory.direction2 *
" - " *
settings.timePeriod[1] *
"/ " *
settings.timePeriod[2] *
" - " *
settings.quantile[1] *
" %-Quantile"
p2 = plotBarsDays(df2, settings, memory)
elseif settings.timePeriod[1] == "match day" &&
settings.analyzedLine != "11"
memory.size = (900, 750)
memory.title =
memory.focus *
" - Direction " *
memory.direction1 *
" - Match Day - " *
settings.quantile[1] *
" %-Quantile"
p1 = plotBarsGameOrRushHour(df1, settings, memory)
memory.title =
memory.focus *
" - Direction " *
memory.direction2 *
" - Match Day - " *
settings.quantile[1] *
" %-Quantile"
p2 = plotBarsGameOrRushHour(df2, settings, memory)
elseif settings.timePeriod[1] == "rush hour" &&
settings.analyzedLine != "11"
memory.title =
memory.focus *
" - Direction " *
memory.direction1 *
" - Rush Hour - " *
settings.quantile[1] *
" %-Quantile"
p1 = plotBarsGameOrRushHour(df1, settings, memory)
memory.title =
memory.focus *
" - Direction " *
memory.direction2 *
" - Rush Hour - " *
settings.quantile[1] *
" % Quantile"
p2 = plotBarsGameOrRushHour(df2, settings, memory)
elseif settings.timePeriod[1] == "no" #no extra settings
memory.title = memory.focus * " - Direction " * memory.direction1
p1 = plotBarsMultQuant(df1, settings, memory)
memory.title = memory.focus * " - Direction " * memory.direction2
p2 = plotBarsMultQuant(df2, settings, memory)
memory.title = memory.focus * " - Distribution"
#plotDistributionInSec(df1, df2, settings, memory)
plotDistributionInMin(df1, df2, settings, memory)
if length(settings.allLines) == length(memory.distributionMin)
#plotAllDistributions(settings, memory)
#memory.title = "Distribution S-Bahn Stuttgart - 'new' quantiles"
memory.title = "Distribution S-Bahn Stuttgart - 'danish' quantiles"
p3 = plotAllDistributions(settings, memory)
settings.allLines = ["1", "2", "3", "4", "5", "6", "60"]
pop!(memory.distributionMin)
memory.title = ""
p4 = plotAllDistributions(settings, memory)
all = plot(p3, p4, layout = (2, 1), legend = :bottomright)
savefig(
all,
settings.outputFilePath *
"\\Plots\\all_Lines" *
"_" *
dateString *
".pdf",
)
end
end
dateString = Dates.format(Dates.now(), "yyyy-mm-dd_HH.MM.SS")
all = plot(p1, p2, layout = (2, 1), legend = memory.legendPosition)
savefig(
all,
settings.outputFilePath *
"\\Plots\\" *
memory.focus *
" " *
settings.approach *
"_" *
dateString *
".pdf",
)
elseif settings.approach == "between registration points"
myTitle1 = ""
myTitle2 = ""
memory.yLabel = "deviation - median (seconds)"
memory.size = (900, 750)
memory.focus = "Line " * settings.analyzedLine
memory.direction1 = settings.stationDict[df1.point2[length(df1.point2)]]
memory.direction2 = settings.stationDict[df2.point2[length(df2.point2)]]
memory.title = memory.focus * " - Direction " * memory.direction1
p1 = plotBarsLineSection(df1, settings, memory)
memory.title = memory.focus * " - Direction " * memory.direction2
p2 = plotBarsLineSection(df2, settings, memory)
all = plot(p1, p2, layout = (2, 1), legend = false, size = (800, 600))
dateString = Dates.format(Dates.now(), "yyyy-mm-dd_HH.MM.SS")
savefig(
all,
settings.outputFilePath *
"\\Plots\\" *
memory.focus *
" " *
settings.approach *
"_" *
dateString *
".pdf",
)
end
end
function plotAllDistributions(settings, memory)
Plots.pyplot()
y = Any[]
x = Any[]
for quantile in settings.quantile
push!(y, parse(Int, quantile))
end
x = memory.distributionMin[1]
#average september and october; source: DB
a = [3, 6]
b = [84.9, 95.8]
tickX = (0:1:20)
z = plot(
a,
b,
label = "average deviation sept/ oct 2017",
xlabel = "Deviation (min)",
ylabel = "Quantile (%)",
marker = true,
legend = :bottomright,
color = :red,
xticks = tickX,
size = (840, 600),
)
colors = [
:gray71,
:slateblue,
:goldenrod,
:darkcyan,
:magenta4,
:aqua,
:deeppink,
:tan4,
]
z = plot!(
x,
y,
title = memory.title,
label = "line " * settings.allLines[1],
marker = true,
color = colors[1],
)
for i = 2:length(memory.distributionMin)
x = memory.distributionMin[i]
colorN = colors[i]
z = plot!(
x,
y,
marker = true,
color = colorN,
label = "line " * settings.allLines[i],
)
end
dateString = Dates.format(Dates.now(), "yyyy-mm-dd_HH.MM.SS")
savefig(
z,
settings.outputFilePath *
"\\Plots\\all_lines_deviation(min)_" *
dateString *
".pdf",
)
return z
end
function plotBarsMultQuant(df, settings, memory)
pyplot()
stations = Array{Int,1}()
depOrArr = Array{String,1}()
quantile = Array{Float64,1}()
type = Array{String,1}()
stationList = Any[]
for index = 1:length(df.station)
push!(stationList, df.station[index] * "_" * df.DepOrArr[index])
for numberQuant = 1:length(settings.quantile)
push!(stations, index)
currentColumn = df[!, "quantile"*string(numberQuant)]
push!(quantile, currentColumn[index])
push!(type, settings.quantile[numberQuant] * "% quantile")
end
end
df_new =
DataFrame(station = (stations), quantile = (quantile), type = (type))
memory.stationList = copy(stationList)
x = plotBars(df_new, memory, settings)
x = plot!(
df.AverageDelay,
linewidth = 3,
linecolor = :blue,
marker = true,
label = "Average",
)
return x
end
function plotBarsDays(df, settings, memory)
pyplot()
stations = Array{Int,1}()
depOrArr = Array{String,1}()
quantile = Array{Float64,1}()
type = Array{String,1}()
stationList = Any[]
for index = 1:length(df.station)
push!(stationList, df.station[index] * "_" * df.DepOrArr[index])
for day in settings.timePeriod
push!(stations, index)
currentColumn = df[!, "quantile"*settings.quantile[1]*"_"*day]
push!(quantile, currentColumn[index])
push!(type, day)
end
end
df_new =
DataFrame(station = (stations), quantile = (quantile), type = (type))
memory.stationList = copy(stationList)
x = plotBars(df_new, memory, settings)
x = plot!(
df[!, "average_"*settings.timePeriod[1]],
linewidth = 3,
linecolor = :orange,
label = "Average Delay " * settings.timePeriod[1],
)
x = plot!(
df[!, "average_"*settings.timePeriod[2]],
linewidth = 3,
linecolor = :blue,
label = "Average Delay " * settings.timePeriod[2],
)
return x
end
function plotBarsLineSection(df, settings, memory)
pyplot()
x = bar(
df.median,
xticks = ([1:1:length(df.points);], df.points),
yticks = ((0-270):30:210),
legend = false,
title = memory.title,
ylabel = memory.yLabel,
xlabel = memory.xLabel,
size = (800, 300),
bar_width = 1.0,
xtickfontrotation = memory.xRotation,
)
return x
end
function plotDistributionInSec(df1, df2, settings, memory)
mean1 = (mean(df1.quantile1) + mean(df2.quantile1)) / 2
mean2 = (mean(df1.quantile2) + mean(df2.quantile2)) / 2
mean3 = (mean(df1.quantile3) + mean(df2.quantile3)) / 2
mean4 = (mean(df1.quantile4) + mean(df2.quantile4)) / 2
pyplot()
y = Any[]
for quantile in settings.quantile
push!(y, parse(Int, quantile))
end
x = [mean1, mean2, mean3, mean4]
#Jahrendurschnitt Quelle: DB
a = [3 * 60, 6 * 60]
b = [84.9, 95.8]
z = plot(
a,
b,
title = memory.title,
label = "average deviation september/october 2017",
xlabel = "Deviation (sec)",
ylabel = "Quantile",
marker = true,
legend = :bottomright,
color = :red,
)
z = plot!(
x,
y,
marker = true,
label = "Deviation Line" * settings.analyzedLine,
)
dateString = Dates.format(Dates.now(), "yyyy-mm-dd_HH.MM.SS")
savefig(
z,
settings.outputFilePath *
"\\Plots\\Line_" *
settings.analyzedLine *
"_Deviation(sec)_" *
dateString *
".pdf",
)
end
function plotDistributionInMin(df1, df2, settings, memory)
mean1 = (mean(df1.quantile1) + mean(df2.quantile1)) / 2 / 60
mean2 = (mean(df1.quantile2) + mean(df2.quantile2)) / 2 / 60
mean3 = (mean(df1.quantile3) + mean(df2.quantile3)) / 2 / 60
mean4 = (mean(df1.quantile4) + mean(df2.quantile4)) / 2 / 60
pyplot()
y = Any[]
for quantile in settings.quantile
push!(y, parse(Int, quantile))
end
x = [mean1, mean2, mean3, mean4]
push!(memory.distributionMin, x)
#Jahrendurschnitt Quelle: DB
a = [3, 6]
b = [84.9, 95.8]
z = plot(
a,
b,
title = memory.title,
label = "average deviation september/october 2017",
xlabel = "Deviation (min)",
ylabel = "Quantile",
marker = true,
legend = :bottomright,
color = :red,
)
z = plot!(
x,
y,
marker = true,
label = "Deviation " * memory.focus,
color = :blue,
)
dateString = Dates.format(Dates.now(), "yyyy-mm-dd_HH.MM.SS")
savefig(
z,
settings.outputFilePath *
"\\Plots\\" *
memory.title *
dateString *
".pdf",
)
end
function plotRailwayNetworkDistr(df1, df2, settings)
mean1 = (mean(df1.quantile1) + mean(df2.quantile1)) / 2 / 60
mean2 = (mean(df1.quantile2) + mean(df2.quantile2)) / 2 / 60
mean3 = (mean(df1.quantile3) + mean(df2.quantile3)) / 2 / 60
mean4 = (mean(df1.quantile4) + mean(df2.quantile4)) / 2 / 60
pyplot()
y = Any[]
for quantile in settings.quantile
push!(y, parse(Int, quantile))
end
x = [mean1, mean2, mean3, mean4]
z = plot(
x,
y,
title = settings.objectInFocus * " - Distribution",
label = "Deviation " * settings.objectInFocus,
xlabel = "Deviation (min)",
ylabel = "Quantile",
marker = true,
legend = :bottomright,
)
#Durchschnitt September und Oktober Quelle: DB
a = [3, 6]
b = [84.9, 95.8]
z = plot!(a, b, marker = true, label = "Average Deviation 2017")
dateString = Dates.format(Dates.now(), "yyyy-mm-dd_HH.MM.SS")
savefig(
z,
settings.outputFilePath *
"\\Plots\\" *
settings.objectInFocus *
"_Deviation(min)_" *
dateString *
".pdf",
)
end
function plotBarsGameOrRushHour(df, settings, memory)
pyplot()
stations = Array{Int,1}()
depOrArr = Array{String,1}()
quantile = Array{Float64,1}()
type = Array{String,1}()
stationList = Any[]
for index = 1:length(df.station)
push!(stationList, df.station[index] * "_" * df.DepOrArr[index])
for indicator in ["yes", "no"]
push!(stations, index)
currentColumn = df[!, "quantile"*settings.quantile[1]*"_"*indicator]
push!(quantile, currentColumn[index])
push!(type, indicator)
end
end
newType = Any[]
if settings.timePeriod[1] == "rush hour"
label1 = "Rush Hour"
label2 = "'Normal' Time"
elseif settings.timePeriod[1] == "match day"
label1 = "Match Day"
label2 = "'Normal' Day"
end
for x in type
if x == "yes"
push!(newType, label1)
else
push!(newType, label2)
end
end
df_new =
DataFrame(station = (stations), quantile = (quantile), type = (newType))
memory.size = (2000, 1000)
memory.stationList = copy(stationList)
x = plotBars(df_new, memory, settings)
x = plot!(
df[!, "average_yes"],
linewidth = 3,
linecolor = :orange,
label = "Average Delay " * label1,
)
x = plot!(
df[!, "average_no"],
linewidth = 3,
linecolor = :blue,
label = "Average Delay " * label2,
)
return x
end
function plotBars(df, memory, settings)
if settings.analyzedLine == "11" &&
memory.title == "Line 11 - Direction Herrenberg"
memory.yTicks = ((0-240):120:3000)
end
x = groupedbar(
df.quantile,
xticks = ([1:1:size(memory.stationList, 1);], memory.stationList),
yticks = memory.yTicks,
group = df.type,
ylabel = memory.yLabel,
xlabel = memory.xLabel,
title = memory.title,
size = memory.size,
bar_width = memory.barwidth,
linewidth = memory.linewidth,
tickfontsize = memory.tickSize,
legendfontsize = memory.legendSize,
guidefontsize = memory.guidefontsize,
titlefontsize = memory.titleSize,
xtickfontrotation = memory.xRotation,
legend = memory.legendPosition,
)
return x
end
end

14
readFile.jl Normal file
View File

@ -0,0 +1,14 @@
module readFile
include("./checkPerformance.jl")
using .performance
print("\n")
settings =
"C:\\Users\\its\\Documents\\Ana\\UNI\\Bachelor\\DataAnalyzing\\settings.yaml"
startAnalyzing(settings)
end

875
registrationPoints.jl Normal file
View File

@ -0,0 +1,875 @@
# approach 1: deviation is analyzed for each registration point
module registrationPoints
include("./output.jl")
using Statistics, CSV, Dates, DataFrames, StatsBase
using .output
export analyzeStatisticalVariation, allDataQuantile, top1, top100
##
function analyzeStatisticalVariation(allPerfData, settings)
quantileD1, quantileD2 = calculateQuantiles(allPerfData, settings)
return quantileD1, quantileD2
end
"""
Function is preparing the new dataframes with the stations which are supposed to
be analyzed.
"""
function calculateQuantiles(perfData, settings)
if settings.objectInFocus == "single line"
lineNr = 1
else
lineNr = findall(x -> x == settings.analyzedLine, settings.allLines)
lineNr = lineNr[1]
end
stationList = settings.stationLists[lineNr]
dataDirection1 = DataFrame()
dataDirection1[!, :station] = stationList
dataDirection1[!, :DepOrArr] = fill("D", size(dataDirection1, 1))
dataDirection2 = DataFrame()
dataDirection2[!, :station] = reverse(stationList)
dataDirection2[!, :DepOrArr] = fill("D", size(dataDirection2, 1))
# sorting the performance data by directions
perfDataDirection1 = perfData[
perfData[:, :ZUGEREIGNIS_RICHTUNG].==stationList[size(stationList, 1)],
:,
]
perfDataDirection2 =
perfData[perfData[:, :ZUGEREIGNIS_RICHTUNG].==stationList[1], :]
if settings.timePeriod[1] == "no"
dataDirection1 = calculateQuantileForDirection(
dataDirection1,
perfDataDirection1,
settings,
)
dataDirection2 = calculateQuantileForDirection(
dataDirection2,
perfDataDirection2,
settings,
)
elseif settings.timePeriod[1] == "match day" &&
settings.analyzedLine != "11"
dataDirection1 = calculateQuantileForGame(
dataDirection1,
perfDataDirection1,
settings,
)
dataDirection2 = calculateQuantileForGame(
dataDirection2,
perfDataDirection2,
settings,
)
elseif settings.timePeriod[1] == "rush hour" &&
settings.analyzedLine != "11"
dataDirection1 = calculateQuantileForTimePeriod(
dataDirection1,
perfDataDirection1,
settings,
)
dataDirection2 = calculateQuantileForTimePeriod(
dataDirection2,
perfDataDirection2,
settings,
)
elseif settings.analyzedLine != "11"
dataDirection1 = calculateQuantileForDay(
dataDirection1,
perfDataDirection1,
settings,
)
dataDirection2 = calculateQuantileForDay(
dataDirection2,
perfDataDirection2,
settings,
)
end
return dataDirection1, dataDirection2
end
"""
Function is calculating the selected quantiles for each registration point for
both directions.
"""
function calculateQuantileForDirection(
dataDirection1,
perfDataDirection1,
settings,
)
deviationArray = Any[] # for deviation shown with quantile
marker = 1 # to make clear if railway object is arriving or departing
# registration points with no data are being deleted
dataDirection1 = deleteEmptyStations(dataDirection1, perfDataDirection1)
for station in eachrow(dataDirection1)
stationPerfData = perfDataDirection1[
perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station,
:,
]
if length(stationPerfData.ZUGEREIGNIS_DS100) == 0
dataDirection1 =
dataDirection1[dataDirection1[:, :station].!=station.station, :]
marker = 0
else
if iseven(marker)
station.DepOrArr = "A"
end
deviationStation = Any[]
totalDeviation = 0
for row in eachrow(stationPerfData)
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 || row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 || row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation, deviation)
end
end
end
push!(deviationArray, deviationStation)
end
marker += 1
end
totalDeviationArray = Any[]
trainRuns = Any[]
quantileNr = 1
for quantile in settings.quantile
quantileLine = Any[]
for row in deviationArray
x = quantile!(row, parse(Float64, quantile) / 100)
push!(quantileLine, x)
if quantileNr == 1
y = mean(row)
push!(totalDeviationArray, y)
z = size(row, 1)
push!(trainRuns, z)
end
end
nameColumn = "quantile" * string(quantileNr)
dataDirection1[!, nameColumn] = quantileLine
quantileNr += 1
end
dataDirection1[!, :AverageDelay] = totalDeviationArray
dataDirection1[!, :TrainRuns] = trainRuns
#saveDataFrame(dataDirection1, settings, "dataframe")
return dataDirection1
end
"""
Function is calculating the selected quantiles for each registration point for
both directions. Only the two selected days are being checked.
"""
function calculateQuantileForDay(dataDirection1, perfDataDirection1, settings)
if size(perfDataDirection1, 1) != 0
deviationArray1 = Any[] #for deviation shown with quantile
totalDeviationArray1 = Any[] #for average deviation
deviationArray2 = Any[] #for deviation shown with quantile
totalDeviationArray2 = Any[] #for average deviation
marker = 1 #to make clear if railway object is arriving or departing
for station in eachrow(dataDirection1)
stationPerfData = perfDataDirection1[
perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station,
:,
]
if iseven(marker)
station.DepOrArr = "A"
end
deviationStation1 = Any[]
deviationStation2 = Any[]
for row in eachrow(stationPerfData)
if row.dayname == settings.timePeriod[1]
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
end
elseif row.dayname == settings.timePeriod[2]
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
end
#println(station.station)
#println(deviation)
end
end
if length(deviationStation1) == 0
deviationStation1 = [0]
elseif length(deviationStation2) == 0
deviationStation2 = [0]
end
push!(deviationArray1, deviationStation1)
push!(deviationArray2, deviationStation2)
marker += 1
end
######################################################
d = 1
for day in settings.timePeriod
if d == 1
deviationArray = deviationArray1
else
deviationArray = deviationArray2
end
dayTime = Any[]
averageDay = Any[]
trainRuns = Any[]
for row in deviationArray
x = quantile!(row, parse(Float64, settings.quantile[1]) / 100)
push!(dayTime, x)
y = mean(row)
push!(averageDay, y)
z = size(row, 1)
if z == 1
z = 0
end #stations with no data
push!(trainRuns, z)
end
nameColumn1 = "quantile" * settings.quantile[1] * "_" * day
dataDirection1[!, nameColumn1] = dayTime
nameColumn1 = "average_" * day
dataDirection1[!, nameColumn1] = averageDay
dataDirection1[!, "train runs "*day] = trainRuns
d += 1
end
#saveDataFrame(dataDirection1, settings, "dataframe")
return dataDirection1
else
println("There is no data for this direction and this line.")
data = DataFrame()
return data
end
end
"""
Function is calculating the selected quantiles for each registration point for
both directions. Match days are compared with "normal" days.
"""
function calculateQuantileForGame(dataDirection1, perfDataDirection1, settings)
if size(perfDataDirection1, 1) != 0
deviationArray1 = Any[] #for deviation shown with quantile
totalDeviationArray1 = Any[] #for average deviation
deviationArray2 = Any[] #for deviation shown with quantile
totalDeviationArray2 = Any[] #for average deviation
marker = 1 #to make clear if railway object is arriving or departing
for station in eachrow(dataDirection1)
stationPerfData = perfDataDirection1[
perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station,
:,
]
if iseven(marker)
station.DepOrArr = "A"
end
deviationStation1 = Any[]
deviationStation2 = Any[]
for row in eachrow(stationPerfData)
if row.day in settings.gamedays
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
end
else
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
end
#println(station.station)
#println(deviation)
end
end
if length(deviationStation1) == 0
deviationStation1 = [0]
elseif length(deviationStation2) == 0
deviationStation2 = [0]
end
push!(deviationArray1, deviationStation1)
push!(deviationArray2, deviationStation2)
marker += 1
end
######################################################
d = 1
for game in ["yes", "no"]
if d == 1
deviationArray = deviationArray1
else
deviationArray = deviationArray2
end
dayTime = Any[]
averageDay = Any[]
for row in deviationArray
x = quantile!(row, parse(Float64, settings.quantile[1]) / 100)
push!(dayTime, x)
y = mean(row)
push!(averageDay, y)
end
nameColumn1 = "quantile" * settings.quantile[1] * "_" * game
dataDirection1[!, nameColumn1] = dayTime
nameColumn1 = "average_" * game
dataDirection1[!, nameColumn1] = averageDay
d += 1
end
return dataDirection1
else
println("There is no data for this direction and this line.")
data = DataFrame()
return data
end
end
"""
Function is calculating the selected quantiles for each registration point for
both directions. Only registration points within the selected time period are
being checked.
"""
function calculateQuantileForTimePeriod(
dataDirection1,
perfDataDirection1,
settings,
)
if size(perfDataDirection1, 1) != 0
deviationArray1 = Any[] #for deviation shown with quantile
totalDeviationArray1 = Any[] #for average deviation
deviationArray2 = Any[] #for deviation shown with quantile
totalDeviationArray2 = Any[] #for average deviation
marker = 1 #to make clear if railway object is arriving or departing
for station in eachrow(dataDirection1)
stationPerfData = perfDataDirection1[
perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station,
:,
]
if iseven(marker)
station.DepOrArr = "A"
end
deviationStation1 = Any[]
deviationStation2 = Any[]
for row in eachrow(stationPerfData)
if row.rushHour == "yes"
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
end
else
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
end
#println(station.station)
#println(deviation)
end
end
if length(deviationStation1) == 0
deviationStation1 = [0]
elseif length(deviationStation2) == 0
deviationStation2 = [0]
end
push!(deviationArray1, deviationStation1)
push!(deviationArray2, deviationStation2)
marker += 1
end
######################################################
d = 1
for rushHour in ["yes", "no"]
if d == 1
deviationArray = deviationArray1
else
deviationArray = deviationArray2
end
time = Any[]
average = Any[]
for row in deviationArray
x = quantile!(row, parse(Float64, settings.quantile[1]) / 100)
push!(time, x)
y = mean(row)
push!(average, y)
end
nameColumn1 = "quantile" * settings.quantile[1] * "_" * rushHour
dataDirection1[!, nameColumn1] = time
nameColumn1 = "average_" * rushHour
dataDirection1[!, nameColumn1] = average
d += 1
end
return dataDirection1
else
println("There is no data for this direction and this line.")
data = DataFrame()
return data
end
end
"""
Stations with no data at all are deleted from the station list and the list
is being modfied.
"""
function deleteEmptyStations(dataDirection1, perfDataDirection1)
lengthData = size(dataDirection1.station, 1)
for station in eachrow(dataDirection1)
stationPerfData = perfDataDirection1[
perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station,
:,
]
if length(stationPerfData.ZUGEREIGNIS_DS100) == 0
dataDirection1 =
dataDirection1[dataDirection1[:, :station].!=station.station, :]
end
end
marker = lengthData - size(dataDirection1.station, 1)
if !iseven(marker) && marker != 0
stationList = (dataDirection1.station)
popfirst!(stationList)
dataDirection = DataFrame()
dataDirection[!, :station] = stationList
dataDirection[!, :DepOrArr] = fill("D", size(dataDirection, 1))
return dataDirection
else
return dataDirection1
end
end
"""
Function creates a "black list" for each week and saves how often each train
number and train number-station combination is represented in the weekly list.
"""
function top100(df1, settings)
# only departures are being analyzed
perfData = df1[df1[:, :ZUGEREIGNIS_TYP].==40, :]
x = df1[df1[:, :ZUGEREIGNIS_TYP].==10, :]
append!(perfData, x)
# first and final day of analysis; each week is being checked
finalDay = Dates.Date("11.10.2017", "dd.mm.yyyy")
firstDay = Dates.Date("01.09.2017", "dd.mm.yyyy HH:MM")
lastDay = (Dates.Date(firstDay) + Dates.Week(1) - Dates.Day(1))
d = firstDay:Dates.Day(1):lastDay
fDreached = false # is the final day already reached?
topAll = Any[]
topNum = Any[]
while fDreached == false
d = firstDay:Dates.Day(1):lastDay
#println(d)
#println(size(perfData, 1))
week = filter(
row ->
Dates.Date(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM") in d,
perfData,
)
devA = Any[]
deviation = 0
for row in eachrow(week)
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(devA, deviation / 60)
end
week[!, :deviation] = devA
# for each week the data is being sorted by deviation
sort!(week, :deviation, rev = true)
i = 1
numbers = unique(week.ZUGEREIGNIS_ZUGNUMMER)
# for lowest deviation:
#reverse!(numbers)
#sort!(week, :deviation, rev = false)
for row in eachrow(week)
if i <= 25
push!(topNum, numbers[i])
i += 1
push!(
topAll,
row.ZUGEREIGNIS_DS100 *
"," *
string(row.ZUGEREIGNIS_ZUGNUMMER),
)
end
end
saveOutput(week, settings)
fDreached = (finalDay in d)
firstDay = lastDay + Dates.Day(1)
lastDay = firstDay + Dates.Week(1) - Dates.Day(1)
end
df = DataFrame(countmap(topAll))
#println(countmap(topAll))
#println(countmap(topNum))
df3 = countmap(topNum)
saveOutput(df, settings)
sleep(1)
saveOutput(df3, settings)
end
"""
A single train number is being analyzed.
"""
function top1(df, settings)
# single train number is being analyzed
perfData = df[df[:, :ZUGEREIGNIS_ZUGNUMMER].==parse(Int, settings.mode), :]
#perfData = perfData[perfData[:, :ZUGEREIGNIS_DS100] .== "TRX",:]
y = perfData[perfData[:, :ZUGEREIGNIS_TYP].==40, :]
x = perfData[perfData[:, :ZUGEREIGNIS_TYP].==10, :]
perfData = append!(y, x)
devA = Any[]
for row in eachrow(perfData)
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(devA, deviation)
end
med = median(devA)
av = mean(devA)
println("Median: "*string(med))
println("Average :"*string(av))
println("Train Runs: "*string(length(devA)))
#println(settings.mode)
end
end

33
settings.yaml Normal file
View File

@ -0,0 +1,33 @@
---
settings:
outputFilePath: "C:\\Users\\its\\Documents\\Ana\\UNI\\Bachelor\\DataAnalyzing\\output\\"
stationsListPath: "C:\\Users\\its\\Documents\\Ana\\UNI\\Bachelor\\DataAnalyzing\\S-BahnStuttgartDaten\\Bahnhofsdaten.csv"
estimatedTimesPath: "C:\\Users\\its\\Documents\\Ana\\UNI\\Bachelor\\DataAnalyzing\\S-BahnStuttgartDaten\\20170901-20171019_Alle_Sollereignisse_S-Bahn_Stuttgart.csv"
realTimeDataPath: "C:\\Users\\its\\Documents\\Ana\\UNI\\Bachelor\\DataAnalyzing\\S-BahnStuttgartDaten\\20170901-20171019_Alle_Istmeldungen_S-Bahn_Stuttgart.csv"
#objectInFocus: "single line"
objectInFocus: "all lines" #single line is being repeated
timePeriod: ["no"]
#timePeriod: ["rush hour","16.00","19.00","06.00","09.00"]
#timePeriod: ["match day"] #(Linie 11)
#timePeriod: ["Thursday","Sunday"] #always 2 to compare
#timePeriod: ["Thursday","Friday"]
mode: "statistical variation"
#mode: "black list"
#mode: "7048"
#approach: "between registration points"
approach: "registration points"
quantile: ["84","90","96","99"]
#quantile: ["50","70","80","90"]
#quantile: ["88"]
analyzedLine: "4"
#allLines: ["1","2","3","4","5","6","60","11"]
allLines: ["1","2","3","4","5","6","60"]
#allLines: ["1","4","6","60","11"]
#allLines: ["1","4","6","60"]
...

75
types.jl Normal file
View File

@ -0,0 +1,75 @@
module types
using Dates
export Settings, Memory
mutable struct Settings
outputFilePath::String
objectInFocus::String
analyzedLine::String
estimatedTimesPath::String
realTimeDataPath::String
stationsListPath::String
mode::String
allLines::Vector{String}
stationDict::Dict{Any, Any}
totallines::Int
stationLists::Vector{Any}
commonStations::Vector{Any}
quantile::Vector{Any}
timePeriod::Vector{String}
approach::String
gamedays::Vector{Any}
end
Settings()=Settings("","","","","","","",[],Dict(),0,[],[],[],[],"",[])
mutable struct Memory
distributionMin::Vector{Any}
title::String
titleSize::Int
stationList::Vector{Any}
focus::String
direction1::String
direction2::String
size::Tuple{Any,Any}
legendSize::Int
legendPosition::Symbol
linewidth::Int
barwidth::Float64
tickSize::Int
guidefontsize:: Int
xRotation::Float64
xLabel::String
yTicks::StepRange{Any,Any}
yLabel::String
color::Array
end
Memory() = Memory(
[],
"",
0,
["x","x"],
"",
"",
"",
(200, 100),
0,
:outerbottom,
0,
0.0,
0,
0,
0.0,
"",
0:1:2,
"",
[:blue],
)
end