Lecture 18#
Announcements#
Homework #5 is now out, due after exam#2
Second optional lecture exercise is out later today or tomorrow, due on friday.
Exam #2 is next monday, November 10. It is open book, open notes as before. Nothing electronic is allowed.
It covers everything we have done in class since Exam #1, yes including ER hierarchies!
Today’s lecture#
Views
Other SQL constructs
Object relational extensions
load_ext sql
%config SqlMagic.displaylimit = None
%sql --section baking
%%sql
with finalists as
(select baker from bakers
where baker not in (select baker from results
where result = 'eliminated'))
select
f.baker
, count(distinct r.baker) as numwins
, min(t.rank) as besttechnical
, max(t.rank) as worsttechnical
from
finalists f
left join results r
on f.baker = r.baker and r.result = 'star baker'
, technicals t
where
f.baker = t.baker
group by
f.baker
| baker | numwins | besttechnical | worsttechnical |
|---|---|---|---|
| Kim-Joy | 1 | 1 | 10 |
| Rahul | 1 | 1 | 7 |
| Ruby | 1 | 1 | 10 |
%%sql
select
b.baker
, count(distinct r.baker) as numwins
, min(t.rank) as besttechnical
, max(t.rank) as worsttechnical
from
bakers b
left join results r
on b.baker = r.baker and r.result = 'star baker'
, technicals t
where
b.baker = t.baker
and b.baker not in (select baker from results
where result ='eliminated')
group by
b.baker
| baker | numwins | besttechnical | worsttechnical |
|---|---|---|---|
| Kim-Joy | 1 | 1 | 10 |
| Rahul | 1 | 1 | 7 |
| Ruby | 1 | 1 | 10 |
%%sql
create or replace view finalists(baker, fullname, age) as
select
baker
, fullname
, age
from
bakers
where
baker not in (select baker from results
where result = 'eliminated');
%%sql
select
f.baker
, f.fullname
, count(distinct r.baker) as numwins
, min(t.rank) as besttechnical
, max(t.rank) as worsttechnical
from
finalists f
left join results r
on f.baker = r.baker and r.result = 'star baker'
, technicals t
where
f.baker = t.baker
group by
f.baker, f.fullname;
| baker | fullname | numwins | besttechnical | worsttechnical |
|---|---|---|---|---|
| Kim-Joy | Kim-Joy Hewlett | 1 | 1 | 10 |
| Rahul | Rahul Mandal | 1 | 1 | 7 |
| Ruby | Ruby Bhogal | 1 | 1 | 10 |
Views#
Views are virtual tables. They do not contain tuples, just query definition.
You can use views in SQL SELECT queries
Query definition of the view is inserted into your query and optimized together.
Write once (correctly!) and execute many times.
Use views to define access control.
You can sometimes use a view to INSERT/UPDATE/DELETE underlying table.
It is only possible if the view is updatable, which means
There is a single relation in the from of the view
The view does not have distinct or group by
If the view does not match the functions it is used for in other queries, it may increase the cost of your queries.
%%sql
-- not updatable
create view olderages(age) as
select distinct age from bakers ;
RuntimeError: (psycopg2.errors.DuplicateTable) relation "olderages" already exists
[SQL: create view olderages(age) as
select distinct age from bakers ;]
(Background on this error at: https://sqlalche.me/e/20/f405)
If you need help solving this issue, send us a message: https://ploomber.io/community
%%sql
create view olderbakers(baker, age) as
select baker, age
from bakers
where age > 35 ;
-- Updateable and even can change age of people!
%%sql
drop view olderbakers ;
create view olderbakers(baker, age) as
select baker, age
from bakers
where age > 35
with check option;
%%sql
-- CASE Statement for implementing IF
select
baker
, 'young' as age
from
bakers
where
age <= 35
union
select baker, 'old' as age from bakers where age>35
order by
age asc;
| baker | age |
|---|---|
| Karen | old |
| Terry | old |
| Dan | old |
| Jon | old |
| Antony | young |
| Manon | young |
| Imelda | young |
| Briony | young |
| Luke | young |
| Kim-Joy | young |
| Rahul | young |
| Ruby | young |
%%sql
select
baker
, case when age>=35 then 'old' else 'young' end as age
from
bakers
order by
age asc;
| baker | age |
|---|---|
| Jon | old |
| Terry | old |
| Dan | old |
| Karen | old |
| Antony | young |
| Rahul | young |
| Ruby | young |
| Kim-Joy | young |
| Manon | young |
| Briony | young |
| Imelda | young |
| Luke | young |
%%sql
create table tmp(id int);
insert into tmp values(1);
insert into tmp values(null);
insert into tmp values(null);
insert into tmp values(3);
insert into tmp values(4);
%%sql
select avg(id) from tmp ;
| avg |
|---|
| 2.6666666666666667 |
%%sql
select avg(case when id is null then 0 else id end) from tmp ;
| avg |
|---|
| 1.6000000000000000 |
Grouping functions#
Group by
GROUPING SETS ((day),(time),());
Cube (day, time)
Aggregate over a partition (without grouping)
OVER (partition by day)
Filter aggregates in select
sum(attr) filter (where attr>X)
%sql --section class
%%sql
drop table if exists events;
CREATE TABLE events (
name VARCHAR(10)
, day VARCHAR(10)
, time VARCHAR(10)
, price INT
) ;
INSERT INTO events VALUES ('sitting','M','12:00',5);
INSERT INTO events VALUES ('reading','W','2:00',10);
INSERT INTO events VALUES ('sleeping','M','2:00',12);
INSERT INTO events VALUES ('hopping','W','12:00',8);
INSERT INTO events VALUES ('jumping','M','4:00',22);
%%sql
-- name, day,time,price
select
day, time, avg(price)
from
events
group by
grouping sets ((day), (time),()) ;
| day | time | avg |
|---|---|---|
| W | None | 9.0000000000000000 |
| M | None | 13.0000000000000000 |
| None | 4:00 | 22.0000000000000000 |
| None | 12:00 | 6.5000000000000000 |
| None | 2:00 | 11.0000000000000000 |
Cube (x,y,z): (x,y,z), (x,y), (x,z), (y,z), (x),(y),(z),()
Cube (x,y): (x,y), (x),(y),()
Rollup (x,y,z): (x,y,z), (x,y), (x), ()
%%sql
select
day, time, avg(price)
from
events
group by
cube(day,time) ;
| day | time | avg |
|---|---|---|
| None | None | 11.4000000000000000 |
| M | 2:00 | 12.0000000000000000 |
| M | 12:00 | 5.0000000000000000 |
| W | 12:00 | 8.0000000000000000 |
| W | 2:00 | 10.0000000000000000 |
| M | 4:00 | 22.0000000000000000 |
| W | None | 9.0000000000000000 |
| M | None | 13.0000000000000000 |
| None | 4:00 | 22.0000000000000000 |
| None | 12:00 | 6.5000000000000000 |
| None | 2:00 | 11.0000000000000000 |
%%sql
select
name,day, time, avg(price)
from
events
group by
rollup(name,day,time) ;
| name | day | time | avg |
|---|---|---|---|
| None | None | None | 11.4000000000000000 |
| sitting | M | 12:00 | 5.0000000000000000 |
| reading | W | 2:00 | 10.0000000000000000 |
| sleeping | M | 2:00 | 12.0000000000000000 |
| hopping | W | 12:00 | 8.0000000000000000 |
| jumping | M | 4:00 | 22.0000000000000000 |
| hopping | W | None | 8.0000000000000000 |
| sleeping | M | None | 12.0000000000000000 |
| sitting | M | None | 5.0000000000000000 |
| reading | W | None | 10.0000000000000000 |
| jumping | M | None | 22.0000000000000000 |
| sleeping | None | None | 12.0000000000000000 |
| reading | None | None | 10.0000000000000000 |
| hopping | None | None | 8.0000000000000000 |
| jumping | None | None | 22.0000000000000000 |
| sitting | None | None | 5.0000000000000000 |
%%sql
select
name
, day
, time
, price
, sum(price) over (partition by day)
from events ;
| name | day | time | price | sum |
|---|---|---|---|---|
| sitting | M | 12:00 | 5 | 39 |
| sleeping | M | 2:00 | 12 | 39 |
| jumping | M | 4:00 | 22 | 39 |
| reading | W | 2:00 | 10 | 18 |
| hopping | W | 12:00 | 8 | 18 |
%%sql
with sums as (select day, sum(price) as totalprice from events group by day)
select
e.*, s.totalprice
from events e, sums s
where e.day = s.day;
| name | day | time | price | totalprice |
|---|---|---|---|---|
| sitting | M | 12:00 | 5 | 39 |
| reading | W | 2:00 | 10 | 18 |
| sleeping | M | 2:00 | 12 | 39 |
| hopping | W | 12:00 | 8 | 18 |
| jumping | M | 4:00 | 22 | 39 |
Recursion!#
Implemented using WITH statement
- Write one query using base tables only! - Write a recursive query using the relation being defined and base relations.
anc(X,Y) :- parent(X,Y) anc(X,Z) :- anc(X,Y), parent(Y,Z)
%%sql
DROP TABLE if exists parents ;
CREATE TABLE parents (
parent varchar(100)
, child varchar(100)
) ;
INSERT INTO parents VALUES('Dakota','Madison');
INSERT INTO parents VALUES('Madison','Ava');
INSERT INTO parents VALUES('Madison','Sophia');
INSERT INTO parents VALUES('Sophia','Noah');
INSERT INTO parents VALUES('Noah','Emma');
%%sql
-- anc(X,Y) :- parent(X,Y)
-- anc(X,Z) :- anc(X,Y), parent(Y,Z)
with recursive anc as (
select parent, child, 1 as degree from parents
union
select a.parent, p.child, a.degree+1
from anc a, parents p
where a.child = p.parent
)
select * from anc
order by degree asc;
| parent | child | degree |
|---|---|---|
| Dakota | Madison | 1 |
| Madison | Ava | 1 |
| Madison | Sophia | 1 |
| Sophia | Noah | 1 |
| Noah | Emma | 1 |
| Dakota | Sophia | 2 |
| Dakota | Ava | 2 |
| Madison | Noah | 2 |
| Sophia | Emma | 2 |
| Dakota | Noah | 3 |
| Madison | Emma | 3 |
| Dakota | Emma | 4 |
%%sql
-- Hierarchies
CREATE TABLE cities (
name text primary key
, population int
, altitude int -- in feet
);
CREATE TABLE capitals (
state char(2)
) INHERITS (cities);
insert into cities values('New York City', 8175133, 33);
insert into cities values('Syracuse', 145170, 380);
insert into capitals values('Albany', 97856, 312, 'NY');
%%sql
select * from capitals;
| name | population | altitude | state |
|---|---|---|---|
| Albany | 97856 | 312 | NY |
%%sql
select * from cities ;
| name | population | altitude |
|---|---|---|
| New York City | 8175133 | 33 |
| Syracuse | 145170 | 380 |
| Albany | 97856 | 312 |
%%sql
select * from only(cities);
| name | population | altitude |
|---|---|---|
| New York City | 8175133 | 33 |
| Syracuse | 145170 | 380 |