Вот начало. Когда я получу больше времени, я добавлю последние три столбца. Это выглядит сложным, но я использую lubridate
, чтобы включить длительности в промежутки времени. Существует функция, называемая new_interval
, которая их создает, а одна называется int_overlaps
, которая тестирует наложения.
Update
код завершения. Проверьте, помогает ли это или нет.
library(lubridate)
starts <- as.POSIXct(df$begin, origin=Sys.time())
ends <- as.POSIXct(df$end, origin=Sys.time())
spans <- new_interval(starts, ends)
s <- split(spans, df$label)
d <- split(df, df$label)
overlap <- function(x1, x2) {
out <- sapply(1:length(s[[x1]]), function(x) {
which(int_overlaps(s[[x1]][x], s[[x2]]))}
)
mat_lst <- lapply(out, function(x) {
matrix(c(d[[x2]]$begin[x],d[[x2]]$end[x]),ncol=2)}
)
mat_lst[lengths(mat_lst) == 0L] <- list(matrix(NA, ncol=2))
mat_lst
}
lh <- overlap("lower", "head")
lu <- overlap("lower", "upper")
matches <- suppressWarnings(lapply(1:nrow(d$lower), function(x) {
cbind(d$lower[x,2:3], lu[[x]], lh[[x]])}
))
new_df <- `names<-`(do.call(rbind, matches), c("lower.begin", "lower.end", "upper.begin", "upper.end", "head.begin", "head.end"))
rownames(new_df) <- NULL
#n.overlaps
count <- colSums(apply(new_df, 1, function(x) !is.na(x)))/2
new_df$n.overlaps <- ave(count, new_df$lower.begin, FUN=function(x) x+length(x)-1)
#overlap.labels
new_df$overlap.labels <- apply(new_df[1:6], 1, function(x)
paste(unique(gsub("\\..*", "", names(which(!is.na(x))))), collapse="|"))
#overlap.duration
first <- pmin(new_df$lower.end, new_df$upper.end)-new_df$upper.begin
second <- pmin(new_df$lower.end, new_df$head.end)-new_df$head.begin
overlap <- ifelse(is.na(first+second), ifelse(is.na(first), second, first), first+second)
new_df$overlap.duration <- ave(overlap, new_df$lower.begin, FUN=sum)
new_df
# lower.begin lower.end upper.begin upper.end head.begin head.end n.overlaps
# 1 9.030 12.410 10.358 11.958 9.918 14.818 3
# 2 28.773 29.975 28.132 30.432 29.823 30.623 4
# 3 28.773 29.975 29.975 33.895 29.823 30.623 4
# 4 33.895 35.992 29.975 33.895 NA NA 3
# 5 33.895 35.992 30.532 46.716 NA NA 3
# 6 46.814 48.854 46.716 50.820 NA NA 2
# 7 58.510 61.510 58.536 61.482 58.802 61.404 4
# 8 58.510 61.510 51.633 58.536 61.404 63.562 4
# 9 62.971 63.491 NA NA 61.404 63.562 2
# overlap.labels overlap.duration
# 1 lower|upper|head 4.092
# 2 lower|upper|head 2.147
# 3 lower|upper|head 2.147
# 4 lower|upper 9.380
# 5 lower|upper 9.380
# 6 lower|upper 2.138
# 7 lower|upper|head 12.557
# 8 lower|upper|head 12.557
# 9 lower|head 2.087
Update # 2
Я нарядная функцию matches
. Он должен быть готов к большему разнообразию. Замените его в старом сценарии.
matches <- suppressWarnings(lapply(1:nrow(d$lower), function(x) {
max.len <- max(length(c(length(lu[[x]]), length(lh[[x]]))))
xu <- lu[[x]]
xh <- lh[[x]]
dim(xu) <- dim(xh) <- NULL
length(xu) <- length(xh) <- max.len
umat <- matrix(xu, byrow=T, ncol=2)
hmat <- matrix(xh, byrow=T, ncol=2)
cbind(d$lower[x,2:3], umat, hmat)}
))
данных
df <- structure(list(label = c("lower", "lower", "lower", "lower",
"lower", "lower", "upper", "upper", "upper", "upper", "upper",
"upper", "upper", "upper", "head", "head", "head", "head"
), begin = c(9.03, 28.773, 33.895, 46.814, 58.51, 62.971, 28.132,
46.716, 58.536, 29.975, 53.376, 10.358, 30.532, 51.633, 9.918,
29.823, 58.802, 61.404), end = c(12.41, 29.975, 35.992, 48.854,
61.51, 63.491, 30.432, 50.82, 61.482, 33.895, 54.08, 11.958,
46.716, 58.536, 14.818, 30.623, 61.404, 63.562)), .Names = c("label",
"begin", "end"), class = "data.frame", row.names = c(NA, -18L))
Я добавил правильное графическое представление таблицы. Возможно, было бы полезно понять таблицу, которую я хотел бы иметь. – user5264244