Lecture 18#

Announcements#

  • Homework #5 is now out, due after exam#2

  • Second optional lecture exercise is out later today or tomorrow, due on friday.

  • Exam #2 is next monday, November 10. It is open book, open notes as before. Nothing electronic is allowed.

    • It covers everything we have done in class since Exam #1, yes including ER hierarchies!

Today’s lecture#

  • Views

  • Other SQL constructs

  • Object relational extensions

load_ext sql
%config SqlMagic.displaylimit = None
%sql --section baking
displaylimit: Value None will be treated as 0 (no limit)
Connecting to 'baking'
%%sql

with finalists as 
    (select baker from bakers 
     where baker not in (select baker from results 
                        where result = 'eliminated'))
select
    f.baker
    , count(distinct r.baker) as numwins
    , min(t.rank) as besttechnical
    , max(t.rank) as worsttechnical
from 
    finalists f
    left join results r
    on f.baker = r.baker and r.result = 'star baker'
    , technicals t
where     
    f.baker = t.baker 
group by 
    f.baker
Running query in 'baking'
3 rows affected.
baker numwins besttechnical worsttechnical
Kim-Joy 1 1 10
Rahul 1 1 7
Ruby 1 1 10
%%sql 

select
    b.baker
    , count(distinct r.baker) as numwins
    , min(t.rank) as besttechnical
    , max(t.rank) as worsttechnical
from 
    bakers b
    left join results r
    on b.baker = r.baker and r.result = 'star baker'
    , technicals t
where     
    b.baker = t.baker 
    and b.baker not in (select baker from results 
                        where result ='eliminated')
group by 
    b.baker
Running query in 'baking'
3 rows affected.
baker numwins besttechnical worsttechnical
Kim-Joy 1 1 10
Rahul 1 1 7
Ruby 1 1 10
%%sql

create or replace view finalists(baker, fullname, age) as
select 
   baker 
   , fullname
   , age
from 
   bakers
where
   baker not in (select baker from results 
                 where result = 'eliminated');
Running query in 'baking'
%%sql 
select
    f.baker
    , f.fullname
    , count(distinct r.baker) as numwins
    , min(t.rank) as besttechnical
    , max(t.rank) as worsttechnical
from 
    finalists f
    left join results r
    on f.baker = r.baker and r.result = 'star baker'
    , technicals t
where     
    f.baker = t.baker 
group by 
    f.baker, f.fullname;
Running query in 'baking'
3 rows affected.
baker fullname numwins besttechnical worsttechnical
Kim-Joy Kim-Joy Hewlett 1 1 10
Rahul Rahul Mandal 1 1 7
Ruby Ruby Bhogal 1 1 10

Views#

Views are virtual tables. They do not contain tuples, just query definition.

  • You can use views in SQL SELECT queries

    • Query definition of the view is inserted into your query and optimized together.

    • Write once (correctly!) and execute many times.

    • Use views to define access control.

  • You can sometimes use a view to INSERT/UPDATE/DELETE underlying table.

    • It is only possible if the view is updatable, which means

    • There is a single relation in the from of the view

    • The view does not have distinct or group by

  • If the view does not match the functions it is used for in other queries, it may increase the cost of your queries.

%%sql

-- not updatable
create view olderages(age) as
select distinct age from bakers ;
Running query in 'baking'
RuntimeError: (psycopg2.errors.DuplicateTable) relation "olderages" already exists

[SQL: create view olderages(age) as
select distinct age from bakers ;]
(Background on this error at: https://sqlalche.me/e/20/f405)
If you need help solving this issue, send us a message: https://ploomber.io/community
%%sql

create view olderbakers(baker, age) as
select baker, age
from bakers
where age > 35 ;

-- Updateable and even can change age of people!
Running query in 'baking'
%%sql 

drop view olderbakers ;

create view olderbakers(baker, age) as
select baker, age
from bakers
where age > 35 
with check option;
Running query in 'baking'
%%sql

-- CASE Statement for implementing IF

select
    baker
    , 'young' as age
from 
    bakers 
where 
    age <= 35 
union
select baker, 'old' as age from bakers where age>35
order by
    age asc;
Running query in 'baking'
12 rows affected.
baker age
Karen old
Terry old
Dan old
Jon old
Antony young
Manon young
Imelda young
Briony young
Luke young
Kim-Joy young
Rahul young
Ruby young
%%sql

select 
    baker
    , case when age>=35 then 'old' else 'young' end as age
from
    bakers 
order by
     age asc;
Running query in 'baking'
12 rows affected.
baker age
Jon old
Terry old
Dan old
Karen old
Antony young
Rahul young
Ruby young
Kim-Joy young
Manon young
Briony young
Imelda young
Luke young
%%sql 

create table tmp(id int);
insert into tmp values(1);
insert into tmp values(null);
insert into tmp values(null);
insert into tmp values(3);
insert into tmp values(4);
Running query in 'baking'
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
%%sql

select avg(id) from tmp ;
Running query in 'baking'
1 rows affected.
avg
2.6666666666666667
%%sql

select avg(case when id is null then 0 else id end) from tmp ;
Running query in 'baking'
1 rows affected.
avg
1.6000000000000000

Grouping functions#

  • Group by

    • GROUPING SETS ((day),(time),());

    • Cube (day, time)

  • Aggregate over a partition (without grouping)

    • OVER (partition by day)

  • Filter aggregates in select

    • sum(attr) filter (where attr>X)

%sql --section class
Connecting and switching to connection 'class'
%%sql 

drop table if exists events;

CREATE TABLE events (
    name     VARCHAR(10)
    , day    VARCHAR(10)
    , time   VARCHAR(10)
    , price  INT
) ;

INSERT INTO events VALUES ('sitting','M','12:00',5);
INSERT INTO events VALUES ('reading','W','2:00',10);
INSERT INTO events VALUES ('sleeping','M','2:00',12);
INSERT INTO events VALUES ('hopping','W','12:00',8);
INSERT INTO events VALUES ('jumping','M','4:00',22);
Running query in 'class'
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
%%sql 

-- name, day,time,price

select 
    day, time, avg(price)
from 
    events
group by 
     grouping sets ((day), (time),()) ;
Running query in 'class'
5 rows affected.
day time avg
W None 9.0000000000000000
M None 13.0000000000000000
None 4:00 22.0000000000000000
None 12:00 6.5000000000000000
None 2:00 11.0000000000000000

Cube (x,y,z): (x,y,z), (x,y), (x,z), (y,z), (x),(y),(z),()

Cube (x,y): (x,y), (x),(y),()

Rollup (x,y,z): (x,y,z), (x,y), (x), ()

%%sql

select 
    day, time, avg(price)
from 
    events
group by 
    cube(day,time) ;
Running query in 'class'
11 rows affected.
day time avg
None None 11.4000000000000000
M 2:00 12.0000000000000000
M 12:00 5.0000000000000000
W 12:00 8.0000000000000000
W 2:00 10.0000000000000000
M 4:00 22.0000000000000000
W None 9.0000000000000000
M None 13.0000000000000000
None 4:00 22.0000000000000000
None 12:00 6.5000000000000000
None 2:00 11.0000000000000000
%%sql

select 
    name,day, time, avg(price)
from 
    events
group by 
    rollup(name,day,time) ;
Running query in 'class'
16 rows affected.
name day time avg
None None None 11.4000000000000000
sitting M 12:00 5.0000000000000000
reading W 2:00 10.0000000000000000
sleeping M 2:00 12.0000000000000000
hopping W 12:00 8.0000000000000000
jumping M 4:00 22.0000000000000000
hopping W None 8.0000000000000000
sleeping M None 12.0000000000000000
sitting M None 5.0000000000000000
reading W None 10.0000000000000000
jumping M None 22.0000000000000000
sleeping None None 12.0000000000000000
reading None None 10.0000000000000000
hopping None None 8.0000000000000000
jumping None None 22.0000000000000000
sitting None None 5.0000000000000000
%%sql

select 
   name
   , day
   , time
   , price
   , sum(price) over (partition by day) 
from events ;
Running query in 'class'
5 rows affected.
name day time price sum
sitting M 12:00 5 39
sleeping M 2:00 12 39
jumping M 4:00 22 39
reading W 2:00 10 18
hopping W 12:00 8 18
%%sql

with sums as (select day, sum(price) as totalprice from events group by day)
select
    e.*, s.totalprice
from  events e, sums s
where e.day = s.day;
Running query in 'class'
5 rows affected.
name day time price totalprice
sitting M 12:00 5 39
reading W 2:00 10 18
sleeping M 2:00 12 39
hopping W 12:00 8 18
jumping M 4:00 22 39

Recursion!#

  • Implemented using WITH statement

    - Write one query using base tables only!
    - Write a recursive query using the relation being defined and base relations.
    

anc(X,Y) :- parent(X,Y) anc(X,Z) :- anc(X,Y), parent(Y,Z)

%%sql 

DROP TABLE if exists parents ;

CREATE TABLE parents (
     parent   varchar(100)
     , child  varchar(100)
) ;


INSERT INTO parents VALUES('Dakota','Madison');
INSERT INTO parents VALUES('Madison','Ava');
INSERT INTO parents VALUES('Madison','Sophia');
INSERT INTO parents VALUES('Sophia','Noah');
INSERT INTO parents VALUES('Noah','Emma');
Running query in 'class'
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
%%sql 

-- anc(X,Y) :- parent(X,Y)
-- anc(X,Z) :- anc(X,Y), parent(Y,Z)

with recursive anc as (
    select parent, child, 1 as degree from parents
    union
    select a.parent, p.child, a.degree+1
    from anc a, parents p
    where a.child = p.parent 
)
select * from anc 
order by degree asc;
Running query in 'class'
12 rows affected.
parent child degree
Dakota Madison 1
Madison Ava 1
Madison Sophia 1
Sophia Noah 1
Noah Emma 1
Dakota Sophia 2
Dakota Ava 2
Madison Noah 2
Sophia Emma 2
Dakota Noah 3
Madison Emma 3
Dakota Emma 4
%%sql

-- Hierarchies

CREATE TABLE cities (
   name            text  primary key
   , population      int
   , altitude        int     -- in feet
);
     
CREATE TABLE capitals (
   state           char(2)
) INHERITS (cities);

insert into cities values('New York City', 8175133, 33);
insert into cities values('Syracuse', 145170, 380);
insert into capitals values('Albany', 97856, 312, 'NY');
Running query in 'class'
1 rows affected.
1 rows affected.
1 rows affected.
%%sql

select * from capitals;
Running query in 'class'
1 rows affected.
name population altitude state
Albany 97856 312 NY
%%sql

select * from cities ;
Running query in 'class'
3 rows affected.
name population altitude
New York City 8175133 33
Syracuse 145170 380
Albany 97856 312
%%sql

select * from only(cities);
Running query in 'class'
2 rows affected.
name population altitude
New York City 8175133 33
Syracuse 145170 380