Home Up PDF Prof. Dr. Ingo Claßen
SQL - Window Functions - DSML

Data - Table t03

Part 1 - 2022

Part 2 - 2023

Part 3 - 2024

Aggregation

  • partition by: whole partition
  • order by: begin up to current line
  • partition by order by: begin partition up to current line in partition

Different Kinds of Aggregation

select
  yyyy,
  mm,
  v1,
  sum(v1) over (partition by yyyy order by mm) as cum_year,
  sum(v1) over (partition by yyyy) as year,
  sum(v1) over (order by yyyy, mm) as cum_all,
  sum(v1) over () as all
from t03
where yyyy in ('2023', '2024') and mm in ('01', '02', '03', '04')
order by yyyy, mm;;

Percent of Total

select
  yyyy,
  mm,
  v1,
  sum(v1) over (partition by yyyy) as year,
  round((v1 / sum(v1) over (partition by yyyy)) * 100, 2)
    as percent_month_of_year
from t03
where yyyy in ('2023', '2024') 
order by yyyy, mm;

Peers

order by: begin up to last peer

select
  yyyy,
  mm,
  v2,
  sum(v2) over (order by v2) as vv2
from t03
where yyyy in ('2024')
order by v2;

Rank

Different Kinds of Ranking

select
  yyyy,
  mm,
  v1,
  rank() over (order by v1 desc) as rank,
  dense_rank() over (order by v1 desc) as dense_rank,
  row_number() over (order by v1 desc) as lineno
from t03
where yyyy in ('2024')
order by rank;

Different Order of Output

select
  yyyy,
  mm,
  v1,
  rank() over (order by v1 desc) as rank,
  dense_rank() over (order by v1 desc) as dense_rank
from t03
where yyyy in ('2024')
order by yyyy, mm;

Ranking within Partitions

select
  yyyy,
  mm,
  v1,
  rank() over (partition by yyyy order by v1 desc) as rank,
  dense_rank() over (partition by yyyy order by v1 desc) as dense_rank,
  row_number() over (partition by yyyy order by v1 desc) as lineno
from t03
where yyyy in ('2023', '2024');

Moving Averages

select
  yyyy,
  mm,
  v1,
  round(avg(v1) over (
    order by mm rows between 1 preceding and 1 following),2)
    as moving_avg
from t03
where yyyy in ('2022')
order by yyyy, mm;

Restriction of window

  • UNBOUNDED PRECEDING
  • offset PRECEDING
  • CURRENT ROW
  • offset FOLLOWING
  • UNBOUNDED FOLLOWING

Positioning

Column mm

select
  mm,
  v1,
  lag(mm) over (order by mm) as one_before,
  lag(mm, 4) over (order by mm) as four_before,
  first_value(mm) over (order by mm) as first,
  last_value(mm) over (order by mm rows between 
    unbounded preceding and unbounded following) as last,
  nth_value(mm, 3) over (order by mm rows between 
    unbounded preceding and unbounded following) as third
from t03
where yyyy in ('2024')
order by mm;

Column v1

select
  mm,
  v1,
  lag(v1) over (order by mm) as one_before,
  lag(v1, 4) over (order by mm) as four_before,
  first_value(v1) over (order by mm) as first,
  last_value(v1) over (order by mm rows between 
    unbounded preceding and unbounded following) as last,
  nth_value(v1, 3) over (order by mm rows between 
    unbounded preceding and unbounded following) as third
from t03
where yyyy in ('2024')
order by mm;

Year-over-Year

select
  yyyy,
  mm,
  lag(yyyy || '-' || mm) 
    over (partition by mm order by yyyy, mm) as previous_year,
  lag(yyyy || '-' || mm, 2) 
    over (partition by mm order by yyyy, mm) as two_years_before
from t03
where mm in ('01', '02', '08')
order by yyyy, mm;

Distributions

Select
  yyyy,
  mm,
  v3,
  row_number() over (order by v3) as nr,
  ntile(6) over (order by v3) as bucket,
  round(cume_dist() over (order by v3), 2)
    as distribution
from t03
where yyyy in ('2024')
order by v3;

ntile(6): dividing into 6 equal buckets

cume_dist: cumulative distribution: number of rows or peers preceeding / total number of rows