私のメインのDBMSはTeradataですが、これはOracleでもそのまま機能します。
WITH all_meas AS
( -- get a distinct list of all from/to points
SELECT road_id, from_meas AS meas
FROM road_events
UNION
SELECT road_id, to_meas
FROM road_events
)
-- select * from all_meas order by 1,2
, all_ranges AS
( -- create from/to ranges
SELECT road_id, meas AS from_meas
,Lead(meas)
Over (PARTITION BY road_id
ORDER BY meas) AS to_meas
FROM all_meas
)
-- SELECT * from all_ranges order by 1,2
, all_event_ranges AS
( -- now match the ranges to the event ranges
SELECT
ar.*
,re.event_id
,re.year
,re.total_road_length
,ar.to_meas - ar.from_meas AS event_length
-- used to filter the latest event as multiple events might cover the same range
,Row_Number()
Over (PARTITION BY ar.road_id, ar.from_meas
ORDER BY year DESC) AS rn
FROM all_ranges ar
JOIN road_events re
ON ar.road_id = re.road_id
AND ar.from_meas < re.to_meas
AND ar.to_meas > re.from_meas
WHERE ar.to_meas IS NOT NULL
)
SELECT event_id, road_id, year, total_road_length, Sum(event_length)
FROM all_event_ranges
WHERE rn = 1 -- latest year only
GROUP BY event_id, road_id, year, total_road_length
ORDER BY road_id, year DESC;
実際にカバーされているfrom/to_meas
を返す必要がある場合 (編集前の質問のように)、それはもっと複雑かもしれません。最初の部分は同じですが、集計を行わないと、クエリは同じevent_idを持つ隣接する行を返すことができます(例:イベント3:0-1&1-25):
SELECT * FROM all_event_ranges
WHERE rn = 1
ORDER BY road_id, from_meas;
隣接する行をマージする場合は、さらに2つの手順が必要です(標準的なアプローチを使用して、グループの最初の行にフラグを付け、グループ番号を計算します):
WITH all_meas AS
(
SELECT road_id, from_meas AS meas
FROM road_events
UNION
SELECT road_id, to_meas
FROM road_events
)
-- select * from all_meas order by 1,2
, all_ranges AS
(
SELECT road_id, meas AS from_meas
,Lead(meas)
Over (PARTITION BY road_id
ORDER BY meas) AS to_meas
FROM all_meas
)
-- SELECT * from all_ranges order by 1,2
, all_event_ranges AS
(
SELECT
ar.*
,re.event_id
,re.year
,re.total_road_length
,ar.to_meas - ar.from_meas AS event_length
,Row_Number()
Over (PARTITION BY ar.road_id, ar.from_meas
ORDER BY year DESC) AS rn
FROM all_ranges ar
JOIN road_events re
ON ar.road_id = re.road_id
AND ar.from_meas < re.to_meas
AND ar.to_meas > re.from_meas
WHERE ar.to_meas IS NOT NULL
)
-- SELECT * FROM all_event_ranges WHERE rn = 1 ORDER BY road_id, from_meas
, adjacent_events AS
( -- assign 1 to the 1st row of an event
SELECT t.*
,CASE WHEN Lag(event_id)
Over(PARTITION BY road_id
ORDER BY from_meas) = event_id
THEN 0
ELSE 1
END AS flag
FROM all_event_ranges t
WHERE rn = 1
)
-- SELECT * FROM adjacent_events ORDER BY road_id, from_meas
, grouped_events AS
( -- assign a groupnumber to adjacent rows using a Cumulative Sum over 0/1
SELECT t.*
,Sum(flag)
Over (PARTITION BY road_id
ORDER BY from_meas
ROWS Unbounded Preceding) AS grp
FROM adjacent_events t
)
-- SELECT * FROM grouped_events ORDER BY road_id, from_meas
SELECT event_id, road_id, year, Min(from_meas), Max(to_meas), total_road_length, Sum(event_length)
FROM grouped_events
GROUP BY event_id, road_id, grp, year, total_road_length
ORDER BY 2, Min(from_meas);
編集:
Ups、ブログを見つけました重複する範囲と優先度 いくつかの単純化されたOracle構文でまったく同じことを行います。実際、クエリをTeradataの他の簡略化された構文からStandard /OracleSQLに変換しました:-)