1
0
Fork 0
Auswertung_Archiv-Daten_S-B.../registrationPoints.jl

876 lines
31 KiB
Julia

# approach 1: deviation is analyzed for each registration point
module registrationPoints
include("./output.jl")
using Statistics, CSV, Dates, DataFrames, StatsBase
using .output
export analyzeStatisticalVariation, allDataQuantile, top1, top100
##
function analyzeStatisticalVariation(allPerfData, settings)
quantileD1, quantileD2 = calculateQuantiles(allPerfData, settings)
return quantileD1, quantileD2
end
"""
Function is preparing the new dataframes with the stations which are supposed to
be analyzed.
"""
function calculateQuantiles(perfData, settings)
if settings.objectInFocus == "single line"
lineNr = 1
else
lineNr = findall(x -> x == settings.analyzedLine, settings.allLines)
lineNr = lineNr[1]
end
stationList = settings.stationLists[lineNr]
dataDirection1 = DataFrame()
dataDirection1[!, :station] = stationList
dataDirection1[!, :DepOrArr] = fill("D", size(dataDirection1, 1))
dataDirection2 = DataFrame()
dataDirection2[!, :station] = reverse(stationList)
dataDirection2[!, :DepOrArr] = fill("D", size(dataDirection2, 1))
# sorting the performance data by directions
perfDataDirection1 = perfData[
perfData[:, :ZUGEREIGNIS_RICHTUNG].==stationList[size(stationList, 1)],
:,
]
perfDataDirection2 =
perfData[perfData[:, :ZUGEREIGNIS_RICHTUNG].==stationList[1], :]
if settings.timePeriod[1] == "no"
dataDirection1 = calculateQuantileForDirection(
dataDirection1,
perfDataDirection1,
settings,
)
dataDirection2 = calculateQuantileForDirection(
dataDirection2,
perfDataDirection2,
settings,
)
elseif settings.timePeriod[1] == "match day" &&
settings.analyzedLine != "11"
dataDirection1 = calculateQuantileForGame(
dataDirection1,
perfDataDirection1,
settings,
)
dataDirection2 = calculateQuantileForGame(
dataDirection2,
perfDataDirection2,
settings,
)
elseif settings.timePeriod[1] == "rush hour" &&
settings.analyzedLine != "11"
dataDirection1 = calculateQuantileForTimePeriod(
dataDirection1,
perfDataDirection1,
settings,
)
dataDirection2 = calculateQuantileForTimePeriod(
dataDirection2,
perfDataDirection2,
settings,
)
elseif settings.analyzedLine != "11"
dataDirection1 = calculateQuantileForDay(
dataDirection1,
perfDataDirection1,
settings,
)
dataDirection2 = calculateQuantileForDay(
dataDirection2,
perfDataDirection2,
settings,
)
end
return dataDirection1, dataDirection2
end
"""
Function is calculating the selected quantiles for each registration point for
both directions.
"""
function calculateQuantileForDirection(
dataDirection1,
perfDataDirection1,
settings,
)
deviationArray = Any[] # for deviation shown with quantile
marker = 1 # to make clear if railway object is arriving or departing
# registration points with no data are being deleted
dataDirection1 = deleteEmptyStations(dataDirection1, perfDataDirection1)
for station in eachrow(dataDirection1)
stationPerfData = perfDataDirection1[
perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station,
:,
]
if length(stationPerfData.ZUGEREIGNIS_DS100) == 0
dataDirection1 =
dataDirection1[dataDirection1[:, :station].!=station.station, :]
marker = 0
else
if iseven(marker)
station.DepOrArr = "A"
end
deviationStation = Any[]
totalDeviation = 0
for row in eachrow(stationPerfData)
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 || row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 || row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation, deviation)
end
end
end
push!(deviationArray, deviationStation)
end
marker += 1
end
totalDeviationArray = Any[]
trainRuns = Any[]
quantileNr = 1
for quantile in settings.quantile
quantileLine = Any[]
for row in deviationArray
x = quantile!(row, parse(Float64, quantile) / 100)
push!(quantileLine, x)
if quantileNr == 1
y = mean(row)
push!(totalDeviationArray, y)
z = size(row, 1)
push!(trainRuns, z)
end
end
nameColumn = "quantile" * string(quantileNr)
dataDirection1[!, nameColumn] = quantileLine
quantileNr += 1
end
dataDirection1[!, :AverageDelay] = totalDeviationArray
dataDirection1[!, :TrainRuns] = trainRuns
#saveDataFrame(dataDirection1, settings, "dataframe")
return dataDirection1
end
"""
Function is calculating the selected quantiles for each registration point for
both directions. Only the two selected days are being checked.
"""
function calculateQuantileForDay(dataDirection1, perfDataDirection1, settings)
if size(perfDataDirection1, 1) != 0
deviationArray1 = Any[] #for deviation shown with quantile
totalDeviationArray1 = Any[] #for average deviation
deviationArray2 = Any[] #for deviation shown with quantile
totalDeviationArray2 = Any[] #for average deviation
marker = 1 #to make clear if railway object is arriving or departing
for station in eachrow(dataDirection1)
stationPerfData = perfDataDirection1[
perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station,
:,
]
if iseven(marker)
station.DepOrArr = "A"
end
deviationStation1 = Any[]
deviationStation2 = Any[]
for row in eachrow(stationPerfData)
if row.dayname == settings.timePeriod[1]
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
end
elseif row.dayname == settings.timePeriod[2]
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
end
#println(station.station)
#println(deviation)
end
end
if length(deviationStation1) == 0
deviationStation1 = [0]
elseif length(deviationStation2) == 0
deviationStation2 = [0]
end
push!(deviationArray1, deviationStation1)
push!(deviationArray2, deviationStation2)
marker += 1
end
######################################################
d = 1
for day in settings.timePeriod
if d == 1
deviationArray = deviationArray1
else
deviationArray = deviationArray2
end
dayTime = Any[]
averageDay = Any[]
trainRuns = Any[]
for row in deviationArray
x = quantile!(row, parse(Float64, settings.quantile[1]) / 100)
push!(dayTime, x)
y = mean(row)
push!(averageDay, y)
z = size(row, 1)
if z == 1
z = 0
end #stations with no data
push!(trainRuns, z)
end
nameColumn1 = "quantile" * settings.quantile[1] * "_" * day
dataDirection1[!, nameColumn1] = dayTime
nameColumn1 = "average_" * day
dataDirection1[!, nameColumn1] = averageDay
dataDirection1[!, "train runs "*day] = trainRuns
d += 1
end
#saveDataFrame(dataDirection1, settings, "dataframe")
return dataDirection1
else
println("There is no data for this direction and this line.")
data = DataFrame()
return data
end
end
"""
Function is calculating the selected quantiles for each registration point for
both directions. Match days are compared with "normal" days.
"""
function calculateQuantileForGame(dataDirection1, perfDataDirection1, settings)
if size(perfDataDirection1, 1) != 0
deviationArray1 = Any[] #for deviation shown with quantile
totalDeviationArray1 = Any[] #for average deviation
deviationArray2 = Any[] #for deviation shown with quantile
totalDeviationArray2 = Any[] #for average deviation
marker = 1 #to make clear if railway object is arriving or departing
for station in eachrow(dataDirection1)
stationPerfData = perfDataDirection1[
perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station,
:,
]
if iseven(marker)
station.DepOrArr = "A"
end
deviationStation1 = Any[]
deviationStation2 = Any[]
for row in eachrow(stationPerfData)
if row.day in settings.gamedays
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
end
else
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
end
#println(station.station)
#println(deviation)
end
end
if length(deviationStation1) == 0
deviationStation1 = [0]
elseif length(deviationStation2) == 0
deviationStation2 = [0]
end
push!(deviationArray1, deviationStation1)
push!(deviationArray2, deviationStation2)
marker += 1
end
######################################################
d = 1
for game in ["yes", "no"]
if d == 1
deviationArray = deviationArray1
else
deviationArray = deviationArray2
end
dayTime = Any[]
averageDay = Any[]
for row in deviationArray
x = quantile!(row, parse(Float64, settings.quantile[1]) / 100)
push!(dayTime, x)
y = mean(row)
push!(averageDay, y)
end
nameColumn1 = "quantile" * settings.quantile[1] * "_" * game
dataDirection1[!, nameColumn1] = dayTime
nameColumn1 = "average_" * game
dataDirection1[!, nameColumn1] = averageDay
d += 1
end
return dataDirection1
else
println("There is no data for this direction and this line.")
data = DataFrame()
return data
end
end
"""
Function is calculating the selected quantiles for each registration point for
both directions. Only registration points within the selected time period are
being checked.
"""
function calculateQuantileForTimePeriod(
dataDirection1,
perfDataDirection1,
settings,
)
if size(perfDataDirection1, 1) != 0
deviationArray1 = Any[] #for deviation shown with quantile
totalDeviationArray1 = Any[] #for average deviation
deviationArray2 = Any[] #for deviation shown with quantile
totalDeviationArray2 = Any[] #for average deviation
marker = 1 #to make clear if railway object is arriving or departing
for station in eachrow(dataDirection1)
stationPerfData = perfDataDirection1[
perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station,
:,
]
if iseven(marker)
station.DepOrArr = "A"
end
deviationStation1 = Any[]
deviationStation2 = Any[]
for row in eachrow(stationPerfData)
if row.rushHour == "yes"
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation1, deviation)
end
end
else
deviation = 0
if iseven(marker)
if row.ZUGEREIGNIS_TYP == 20 ||
row.ZUGEREIGNIS_TYP == 50
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
elseif !iseven(marker)
if row.ZUGEREIGNIS_TYP == 10 ||
row.ZUGEREIGNIS_TYP == 40
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(deviationStation2, deviation)
end
end
#println(station.station)
#println(deviation)
end
end
if length(deviationStation1) == 0
deviationStation1 = [0]
elseif length(deviationStation2) == 0
deviationStation2 = [0]
end
push!(deviationArray1, deviationStation1)
push!(deviationArray2, deviationStation2)
marker += 1
end
######################################################
d = 1
for rushHour in ["yes", "no"]
if d == 1
deviationArray = deviationArray1
else
deviationArray = deviationArray2
end
time = Any[]
average = Any[]
for row in deviationArray
x = quantile!(row, parse(Float64, settings.quantile[1]) / 100)
push!(time, x)
y = mean(row)
push!(average, y)
end
nameColumn1 = "quantile" * settings.quantile[1] * "_" * rushHour
dataDirection1[!, nameColumn1] = time
nameColumn1 = "average_" * rushHour
dataDirection1[!, nameColumn1] = average
d += 1
end
return dataDirection1
else
println("There is no data for this direction and this line.")
data = DataFrame()
return data
end
end
"""
Stations with no data at all are deleted from the station list and the list
is being modfied.
"""
function deleteEmptyStations(dataDirection1, perfDataDirection1)
lengthData = size(dataDirection1.station, 1)
for station in eachrow(dataDirection1)
stationPerfData = perfDataDirection1[
perfDataDirection1[:, :ZUGEREIGNIS_DS100].==station.station,
:,
]
if length(stationPerfData.ZUGEREIGNIS_DS100) == 0
dataDirection1 =
dataDirection1[dataDirection1[:, :station].!=station.station, :]
end
end
marker = lengthData - size(dataDirection1.station, 1)
if !iseven(marker) && marker != 0
stationList = (dataDirection1.station)
popfirst!(stationList)
dataDirection = DataFrame()
dataDirection[!, :station] = stationList
dataDirection[!, :DepOrArr] = fill("D", size(dataDirection, 1))
return dataDirection
else
return dataDirection1
end
end
"""
Function creates a "black list" for each week and saves how often each train
number and train number-station combination is represented in the weekly list.
"""
function top100(df1, settings)
# only departures are being analyzed
perfData = df1[df1[:, :ZUGEREIGNIS_TYP].==40, :]
x = df1[df1[:, :ZUGEREIGNIS_TYP].==10, :]
append!(perfData, x)
# first and final day of analysis; each week is being checked
finalDay = Dates.Date("11.10.2017", "dd.mm.yyyy")
firstDay = Dates.Date("01.09.2017", "dd.mm.yyyy HH:MM")
lastDay = (Dates.Date(firstDay) + Dates.Week(1) - Dates.Day(1))
d = firstDay:Dates.Day(1):lastDay
fDreached = false # is the final day already reached?
topAll = Any[]
topNum = Any[]
while fDreached == false
d = firstDay:Dates.Day(1):lastDay
#println(d)
#println(size(perfData, 1))
week = filter(
row ->
Dates.Date(row.ZUGEREIGNIS_SOLLZEIT, "dd.mm.yyyy HH:MM") in d,
perfData,
)
devA = Any[]
deviation = 0
for row in eachrow(week)
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(devA, deviation / 60)
end
week[!, :deviation] = devA
# for each week the data is being sorted by deviation
sort!(week, :deviation, rev = true)
i = 1
numbers = unique(week.ZUGEREIGNIS_ZUGNUMMER)
# for lowest deviation:
#reverse!(numbers)
#sort!(week, :deviation, rev = false)
for row in eachrow(week)
if i <= 25
push!(topNum, numbers[i])
i += 1
push!(
topAll,
row.ZUGEREIGNIS_DS100 *
"," *
string(row.ZUGEREIGNIS_ZUGNUMMER),
)
end
end
saveOutput(week, settings)
fDreached = (finalDay in d)
firstDay = lastDay + Dates.Day(1)
lastDay = firstDay + Dates.Week(1) - Dates.Day(1)
end
df = DataFrame(countmap(topAll))
#println(countmap(topAll))
#println(countmap(topNum))
df3 = countmap(topNum)
saveOutput(df, settings)
sleep(1)
saveOutput(df3, settings)
end
"""
A single train number is being analyzed.
"""
function top1(df, settings)
# single train number is being analyzed
perfData = df[df[:, :ZUGEREIGNIS_ZUGNUMMER].==parse(Int, settings.mode), :]
#perfData = perfData[perfData[:, :ZUGEREIGNIS_DS100] .== "TRX",:]
y = perfData[perfData[:, :ZUGEREIGNIS_TYP].==40, :]
x = perfData[perfData[:, :ZUGEREIGNIS_TYP].==10, :]
perfData = append!(y, x)
devA = Any[]
for row in eachrow(perfData)
deviation = Dates.value(
Second(
convert(
Dates.Second,
Dates.DateTime(
row.ZUGEREIGNIS_ISTZEIT,
"dd.mm.yyyy HH:MM",
) - Dates.DateTime(
row.ZUGEREIGNIS_SOLLZEIT,
"dd.mm.yyyy HH:MM",
),
),
),
)
push!(devA, deviation)
end
med = median(devA)
av = mean(devA)
println("Median: "*string(med))
println("Average :"*string(av))
println("Train Runs: "*string(length(devA)))
#println(settings.mode)
end
end