AVG |
Returns a running average |
AVG( DISTINCT | ALL <expression>) OVER (analytic clause) |
CREATE TABLE vote_count (
submit_date DATE NOT NULL,
num_votes NUMBER NOT NULL);
INSERT INTO vote_count VALUES (TRUNC(SYSDATE)-4, 100);
INSERT INTO vote_count VALUES (TRUNC(SYSDATE)-3, 150);
INSERT INTO vote_count VALUES (TRUNC(SYSDATE)-2, 75);
INSERT INTO vote_count VALUES (TRUNC(SYSDATE)-3, 25);
INSERT INTO vote_count VALUES (TRUNC(SYSDATE)-1, 50);
COMMIT;
SELECT * FROM vote_count;
SELECT submit_date, num_votes, TRUNC(AVG(num_votes)
OVER(ORDER BY submit_date ROWS UNBOUNDED PRECEDING)) AVG_VOTE_PER_DAY
FROM vote_count
ORDER BY submit_date;
SELECT submit_date, num_votes, TRUNC(AVG(num_votes)
OVER(PARTITION BY submit_date
ORDER BY submit_date ROWS UNBOUNDED PRECEDING)) AVG_VOTE_PER_DAY
FROM vote_count
ORDER BY submit_date; |
|
CORR |
Returns the coefficient of correlation of a set of number pairs |
CORR(<expression1>, <expression2>) OVER (<analytic clause>) |
conn sh/sh
SELECT t.calendar_month_number,
CORR (SUM(s.amount_sold), SUM(s.quantity_sold))
OVER (ORDER BY t.calendar_month_number) AS CUM_CORR
FROM sales s, times t
WHERE s.time_id = t.time_id AND calendar_year = 1998
GROUP BY t.calendar_month_number; |
|
COUNT |
Returns a running count of all records or by partition |
COUNT(<*, [ DISTINCT | ALL] <expression>>) OVER (<analytic clause>) |
SELECT submit_date, num_votes, TRUNC(COUNT(num_votes)
OVER(ORDER BY submit_date ROWS UNBOUNDED PRECEDING)) AS DAY_COUNT
FROM vote_count
ORDER BY submit_date;
SELECT submit_date, COUNT(*)
OVER(PARTITION BY submit_date ORDER BY submit_date
ROWS UNBOUNDED PRECEDING) NUM_RECS
FROM vote_count; |
Return a frequency
distribution |
CREATE TABLE myprods (
prod1 NUMBER(3),
prod2 NUMBER(3),
prod3 NUMBER(3));
INSERT INTO myprods VALUES (34,23,45);
INSERT INTO myprods VALUES (34,22,34);
INSERT INTO myprods VALUES (54,44,45);
INSERT INTO myprods VALUES (23,22,45);
INSERT INTO myprods VALUES (45,22,34);
SELECT prod1, COUNT(prod1) OVER (PARTITION BY prod1) freq1,
prod2, COUNT(prod2) OVER (PARTITION BY prod2) freq2,
prod3, COUNT(prod3) OVER (PARTITION BY prod3) freq3
FROM myprods; |
|
COVAR_POP |
Returns the population covariance of a set of number pairs |
COVAR_POP(<expression1>, <expression2>) OVER (<analytic clause>) |
conn hr/hr
SELECT job_id,
COVAR_POP(SYSDATE-hire_date, salary) AS covar_pop,
COVAR_SAMP(SYSDATE-hire_date, salary) AS covar_samp
FROM employees
WHERE department_id in (50, 80)
GROUP BY job_id; |
|
COVAR_SAMP |
Returns the sample covariance of a set of number pairs |
COVAR_SAMP(<expression1>, <expression2>) OVER (<analytic clause>) |
See COVAR_POP Demo above |
|
CUME_DIST |
Returns the cumulative distribution of a value in a group of values |
CUME_DIST(<value>) OVER (<partition_clause> <order by clause>) |
conn hr/hr
SELECT job_id, last_name, salary, CUME_DIST()
OVER (PARTITION BY job_id
ORDER BY salary) AS cume_dist
FROM employees
WHERE job_id LIKE 'PU%'; |
|
DENSE_RANK |
Ranks items in a
group leaving no gaps in ranking sequence when there are ties |
DENSE_RANK() OVER (<query_partition_clause> <order_by_clause>)
|
conn hr/hr
SELECT d.department_name, e.last_name, e.salary, DENSE_RANK()
OVER (PARTITION BY e.department_id
ORDER BY e.salary) AS DENSE_RANK
FROM employees e, departments d
WHERE e.department_id = d.department_id
AND d.department_id IN (30, 60); |
|
FIRST |
Returns the row ranked first using DENSE_RANK |
SELECT <aggregate_function(column_name)> KEEP
(DENSE_RANK FIRST ORDER BY <column_name> [<ASC|DESC> NULLS <FIRST|LAST>)
OVER (PARTITION BY <column_name>)
FROM <table_name>
GROUP BY <column_name>; |
conn hr/hr
SELECT last_name, department_id, salary,
MIN(salary) KEEP (DENSE_RANK FIRST ORDER BY commission_pct)
OVER (PARTITION BY department_id) "Worst",
MAX(salary) KEEP (DENSE_RANK LAST ORDER BY commission_pct)
OVER (PARTITION BY department_id) "Best"
FROM employees
WHERE department_id IN (30, 60)
ORDER BY department_id, salary; |
|
FIRST_VALUE |
Returns the first value in an ordered set of values. If the
first value in the set is null, then the function returns NULL unless you specify IGNORE NULLS |
FIRST_VALUE(<expression> [IGNORE NULLS])
OVER (<analytic clause>) |
conn hr/hr
SELECT last_name, salary, hire_date, FIRST_VALUE(hire_date)
OVER (ORDER BY salary ROWS BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING) AS lv
FROM (SELECT * FROM employees WHERE department_id = 90
ORDER BY hire_date); |
|
IGNORE NULLS |
The following
is a minor modification of a demo published in the November/December
2006 issue of Oracle Magazine by Tom Kyte |
(<column_name> IGNORE
NULLS) |
CREATE TABLE t1 (
row_num NUMBER(3),
col1 VARCHAR2(15),
col2 VARCHAR2(15));
INSERT INTO t1 VALUES (6, NULL, NULL);
INSERT INTO t1 VALUES (1, 'Category 1', 'Mango');
INSERT INTO t1 VALUES (2, NULL, NULL);
INSERT INTO t1 VALUES (3, NULL, NULL);
INSERT INTO t1 VALUES (4, NULL, 'Banana');
INSERT INTO t1 VALUES (5, NULL, NULL);
INSERT INTO t1 VALUES (6, NULL, NULL);
INSERT INTO t1 VALUES (7, 'Category 2', 'Vanilla');
INSERT INTO t1 VALUES (8, NULL, NULL);
INSERT INTO t1 VALUES (9, 'Category 3', 'Strawberry');
COMMIT;
SELECT * FROM t1;
SELECT row_num,
LAST_VALUE(col1 IGNORE NULLS) OVER (ORDER BY row_num) col1,
LAST_VALUE(col2 IGNORE NULLS) OVER (ORDER BY row_num) col2
FROM t1
ORDER BY row_num; |
|
LAG |
LAG provides access to more than one row of a table at the same
time without a self-join. Given a series of rows returned from a query and a position of the cursor, LAG provides access
to a row at a given physical offset prior to that position. |
LAG(<value expression>, <offset>, <default>)
OVER ([<query partition clause>] <order_by_clause>) |
conn hr/hr
SELECT last_name, hire_date, salary,
LAG(salary, 1, 0) OVER
(ORDER BY hire_date) AS PREV_SAL
FROM employees
WHERE job_id = 'PU_CLERK'; |
|
LAST |
Returns the row ranked last using DENSE_RANK |
<aggregate function> KEEP (DENSE_RANK LAST ORDER BY
(<expression> <ASC | DESC> NULLS <FIRST | LAST>) |
See FIRST Demo above |
|
LAST_VALUE |
Returns the last value in an ordered set of values.
If the last value in the set is null, then the function returns NULL unless you specify IGNORE NULLS.
This setting is useful for data densification. If you specify IGNORE NULLS, then LAST_VALUE returns the first
non-null value in the set, or NULL if all values are null. |
LAST_VALUE (<expression> IGNORE NULLS) OVER (<analytic clause>) |
conn hr/hr
SELECT last_name, salary, hire_date, LAST_VALUE(hire_date)
OVER (ORDER BY salary ROWS BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING) AS lv
FROM (SELECT * FROM employees WHERE department_id = 90
ORDER BY hire_date); |
|
LEAD |
LEAD provides access to a row at a given physical offset beyond that position |
LEAD(<expression, offset, default>)
[(<query_partition_clause>)]
OVER (<order_by_clause>) |
SELECT submit_date, num_votes,
LEAD(num_votes, 1, 0) OVER
(ORDER BY submit_date) AS NEXT_VAL
FROM vote_count; |
|
MAX |
Returns the maximum value by partition |
MAX (< DISTINCT | ALL> expression) OVER (<analytic clause>) |
conn hr/hr
SELECT manager_id, last_name, salary
FROM (
SELECT manager_id, last_name, salary,
MAX(salary) OVER
(PARTITION BY manager_id) AS rmax_sal
FROM employees)
WHERE salary = rmax_sal; |
|
MIN |
Returns the minimum value by partition |
MIN (< DISTINCT | ALL> expression) OVER (<analytic clause>) |
conn hr/hr
SELECT manager_id, last_name, salary
FROM (
SELECT manager_id, last_name, salary,
MAX(salary) OVER
(PARTITION BY manager_id) AS rmax_sal
FROM employees)
WHERE salary = rmax_sal; |
|
NTILE |
Divides an ordered data set into a number of buckets indicated
by expr and assigns the appropriate bucket number to each row. The buckets are numbered 1 through expr. The expr value
must resolve to a positive constant for each partition. |
NTILE (<expression>) OVER ([query_partition_clause] <order by clause>) |
conn hr/hr
SELECT last_name, salary,
NTILE(4) OVER (ORDER BY salary DESC) AS quartile
FROM employees
WHERE department_id = 100; |
|
OVER PARTITION BY
|
This demo returns employees that
are making above average salary in their respective department |
NTILE (<expression>) OVER ([query_partition_clause] <order by clause>) |
conn hr/hr
col ename format a30
col department_name format a20
SELECT * FROM (
SELECT e.ffirst_name || ' ' || e.last_name ENAME,
d.department_name,
e.salary,
TRUNC(e.salary - avg(e.salary) OVER (PARTITION
BY
e.department_id)) sal_dif
FROM employees e, departments d
WHERE e.department_id=d.department_id)
WHERE sal_dif > 0
ORDER BY 2,4 DESC; |
|
PERCENT_RANK |
For a row r, PERCENT_RANK calculates the rank of r minus 1, divided
by 1 less than the number of rows being evaluated (the entire query result set or a partition). |
PERCENT_RANK(<value>) OVER (<partition_clause> <order_by_clause>) |
conn hr/hr
SELECT department_id, last_name, salary, PERCENT_RANK()
OVER (PARTITION BY department_id ORDER BY salary DESC) AS pr
FROM employees
ORDER BY pr, salary; |
|
PERCENTILE_CONT |
Inverse distribution function that assumes a continuous distribution
model. It takes a percentile value and a sort specification, and returns an interpolated value that would fall into that
percentile value with respect to the sort specification. Nulls are ignored in the calculation. |
PERCENTILE_CONT(<value>) WITHIN GROUP (ORDER BY <expression>
[ASC | DESC]) OVER (<partition_clause>) |
conn hr/hr
SELECT last_name, salary, department_id,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary DESC)
OVER (PARTITION BY department_id) PCT_CONT, PERCENT_RANK()
OVER (PARTITION BY department_id ORDER BY salary DESC) PCT_RANK
FROM employees
WHERE department_id IN (30, 60); |
|
PERCENTILE_DISC |
An inverse distribution function that assumes a discrete distribution
model. It takes a percentile value and a sort specification and returns an element from the set. Nulls are ignored in the
calculation. |
PERCENTILE_DISC(<expression>) WITHIN GROUP (ORDER BY <order_by_clause>) |
conn hr/hr
col cume_dist format 9.999
SELECT last_name, salary, department_id,
PERCENTILE_DISC(0.5) WITHIN GROUP
(ORDER BY salary DESC)
OVER (PARTITION BY department_id) PCT_DISC,
CUME_DIST() OVER (PARTITION BY department_id
ORDER BY salary DESC) CUME_DIST
FROM employees
WHERE department_id IN (30, 60); |
|
RANK |
Calculates the rank of a value in a group of values |
RANK(<value>) OVER (<partition_clause> ORDER BY <order_by_clause>) |
conn hr/hr
SELECT department_id, last_name, salary, commission_pct,
RANK() OVER
(PARTITION BY department_id
ORDER BY salary DESC, commission_pct) "Rank"
FROM employees
WHERE department_id = 80;
/* The following query finds the 5 top-selling products for each product subcategory where that product
contributes more than 20% of the sales within its product category. */
conn sh/sh
col categ format a15
col prod_subcategory format a20
SELECT SUBSTR(prod_category,1,8) AS CATEG, prod_subcategory, prod_id, sales
FROM (
SELECT p.prod_category, p.prod_subcategory, p.prod_id,
SUM(amount_sold) as SALES, SUM(SUM(amount_sold))
OVER (PARTITION BY p.prod_category) AS CAT_SALES,
SUM(SUM(amount_sold))
OVER (PARTITION BY p.prod_subcategory) AS SUBCAT_SALES,
RANK() OVER
(PARTITION BY p.prod_subcategory
ORDER BY SUM(amount_sold) ) AS RANK_IN_LINE
FROM sales s, customers c, countries co, products p
WHERE s.cust_id = c.cust_id
AND c.country_id = co.country_id
AND s.prod_id = p.prod_id
AND s.time_id = TO_DATE('11-OCT-2000')
GROUP BY p.prod_category, p.prod_subcategory, p.prod_id
ORDER BY prod_category, prod_subcategory)
WHERE SUBCAT_SALES > 0.2 * CAT_SALES
AND RANK_IN_LINE<=5; |
|
RATIO_TO_REPORT |
Computes the ratio of a value to the sum of a set of values.
If expr evaluates to null, then the ratio-to-report value also evaluates to null. |
RATIO_TO_REPORT(<value>)
OVER (<partition_clause>) |
conn hr/hr
SELECT last_name, salary, RATIO_TO_REPORT(salary) OVER () AS RR
FROM employees
WHERE job_id = 'PU_CLERK'; |
|
REGR_ (Linear Regression) Functions |
Generic Syntax |
FUNCTION_NAME (<expression1>,<expression2>) OVER (<analytic_clause>) |
REGR_AVGX |
--
see REGR_AVGX Demo
conn hr/hr
SELECT job_id, employee_id ID, salary,
REGR_SLOPE(SYSDATE-hire_date, salary)
OVER (PARTITION BY job_id) slope,
REGR_INTERCEPT(SYSDATE-hire_date, salary)
OVER (PARTITION BY job_id) intcpt,
REGR_R2(SYSDATE-hire_date, salary)
OVER (PARTITION BY job_id) rsqr,
REGR_COUNT(SYSDATE-hire_date, salary)
OVER (PARTITION BY job_id) count,
REGR_AVGX(SYSDATE-hire_date, salary)
OVER (PARTITION BY job_id) avgx,
REGR_AVGY(SYSDATE-hire_date, salary)
OVER (PARTITION BY job_id) avgy
FROM employees
WHERE department_id in (50, 80)
ORDER BY job_id, employee_id; |
REGR_AVGY |
--
see REGR_AVGX Demo
conn hr/hr
SELECT job_id,
REGR_AVGY(SYSDATE - hire_date, salary) avgy,
REGR_AVGX(SYSDATE - hire_date, salary) avgx
FROM employees
WHERE department_id in (30, 50)
GROUP BY job_id; |
REGR_COUNT |
--
see REGR_AVGX Demo
conn hr/hr
SELECT job_id,
REGR_COUNT(SYSDATE-hire_date, salary) count
FROM employees
WHERE department_id in (30, 50)
GROUP BY job_id; |
REGR_INTERCEPT |
--
see REGR_AVGX Demo
conn hr/hr
SELECT job_id,
REGR_SLOPE(SYSDATE - hire_date, salary) slope,
REGR_INTERCEPT(SYSDATE - hire_date, salary) intercept
FROM employees
WHERE department_id in (50,80)
GROUP BY job_id
ORDER BY job_id; |
REGR_R2 |
--
see REGR_AVGX Demo
conn hr/hr
SELECT job_id, REGR_R2(SYSDATE-hire_date, salary) Regr_R2
FROM employees
WHERE department_id IN (50, 80)
GROUP BY job_id; |
REGR_SLOPE |
See REGR_AVGX Demo
See REGR_INTERCEPT Demo |
REGR_SXX |
conn hr/hr
SELECT job_id,
REGR_SXY((SYSDATE - hire_date, salary) regr_sxy,
REGR_SXX((SYSDATE - hire_date, salary) regr_sxx,
REGR_SYY(SYSDATE - hire_date, salary) regr_syy
FROM employees
WHERE department_id in (50, 80)
GROUP BY job_id
ORDER BY job_id; |
REGR_SXY |
See REGR_SXX Demo |
REGR_SYY |
See REGR_SXX Demo |
|
ROW_NUMBER |
Assigns a unique number to each row to which it is applied (either each row in the partition or
each row returned by the query), in the ordered sequence of rows specified in the order by clause, beginning with 1. |
ROW_NUMBER(<value>) OVER
(<partition_clause> ORDER BY <order_by_clause>) |
CREATE TABLE test (
id NUMBER(1),
degrees NUMBER(3));
INSERT INTO test VALUES (0,235);
INSERT INTO test VALUES (0,276);
INSERT INTO test VALUES (1,211);
INSERT INTO test VALUES (1,250);
INSERT INTO test VALUES (1,255);
INSERT INTO test VALUES (2,55);
INSERT INTO test VALUES (2,277);
INSERT INTO test VALUES (2,69);
INSERT INTO test VALUES (3,25);
INSERT INTO test VALUES (3,166);
INSERT INTO test VALUES (3,262);
INSERT INTO test VALUES (4,47);
INSERT INTO test VALUES (4,238);
INSERT INTO test VALUES (4,40);
COMMIT;
SELECT * FROM test;
-- choose the starting cell
SELECT id, degrees s
FROM (
SELECT id, degrees, (360 - degrees) d360,
ROW_NUMBER() OVER(PARTITION BY id
ORDER BY CASE
WHEN( (degrees < 360 - degrees) THEN( degrees
ELSE 360 - degrees
END) rn
FROM test) t
WHERE rn = 1;
-- order the rest clockwise
SELECT *
FROM (
SELECT t.id, t.degrees,
ROW_NUMBER() OVER(PARTITION BY t.id
ORDER BY CASE
WHEN( (t.degrees < starting_cell.degrees) THEN( t.degrees + 360
ELSE t.degrees
END) rn
FROM test t
JOIN (
SELECT id, degrees, (360 - degrees) d360,
ROW_NUMBER() OVER(PARTITION BY id
ORDER BY CASE
WHEN( (degrees < 360 - degrees) THEN( degrees
ELSE 360 - degrees
END) rn
FROM test) starting_cell
ON t.id = starting_cell.id
WHERE starting_cell.rn=1)t
ORDER BY id, rn; |
|
STDDEV |
Returns the sample standard deviation of an expression |
STDDEV([ DISTINCT | ALL] <expression>) OVER (<analytic_clause>) |
conn hr/hr
col stddev format 99999.999
SELECT last_name, salary,
STDDEV(salary) OVER
(ORDER BY hire_date) "StdDev"
FROM employees
WHERE department_id = 30;
|
|
STDDEV_POP |
Computes the population standard deviation
and returns the square root of the population variance |
STDDEV_POP(<expression>) OVER (<analytic_clause>) |
conn hr/hr
SELECT department_id, last_name, salary,
STDDEV_POP(salary) OVER
(PARTITION BY department_id) AS pop_std
FROM employees; |
|
STDDEV_SAMP |
Computes the cumulative sample standard deviation and returns the square root of the sample variance.
|
STDDEV_SAMP(<expression>) OVER (<analytic_clause>) |
conn hr/hr
SELECT department_id, last_name, hire_date, salary,
STDDEV_SAMP(salary) OVER
(PARTITION BY department_id
ORDER BY hire_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cum_sdev
FROM employees; |
|
SUM |
Computes the cumulative sample running sum
Corrected thanks to a note from Mette Stephansen
in Denmark. |
|
CREATE TABLE vote_count (
submit_date DATE NOT NULL,
num_votes NUMBER NOT NULL);
INSERT INTO vote_count VALUES (TRUNC(SYSDATE)-4, 100);
INSERT INTO vote_count VALUES (TRUNC(SYSDATE)-3, 150);
INSERT INTO vote_count VALUES (TRUNC(SYSDATE)-2, 75);
INSERT INTO vote_count VALUES (TRUNC(SYSDATE)-3, 25);
INSERT INTO vote_count VALUES (TRUNC(SYSDATE)-1, 50);
COMMIT;
SELECT * FROM vote_count;
SELECT submit_date, num_votes, SUM(num_votes)
OVER(ORDER BY submit_date
ROWS UNBOUNDED PRECEDING) TOT_VOTE
FROM vote_count
ORDER BY submit_date;
|
|
VAR_POP |
Returns the population variance of a set of numbers |
VAR_POP(<value>) OVER (<analytic_clause>) |
conn sh/sh
SELECT t.calendar_month_desc,
VAR_POP(SUM(s.amount_sold))
OVER (ORDER BY t.calendar_month_desc) "Var_Pop",
VAR_SAMP(SUM(s.amount_sold))
OVER (ORDER BY t.calendar_month_desc) "Var_Samp"
FROM sales s, times t
WHERE s.time_id = t.time_id AND t.calendar_year = 2001
GROUP BY t.calendar_month_desc; |
|
VAR_SAMP |
Returns the sample variance of a set of numbers |
VAR_SAMP(<value>) OVER (<analytic_clause>) |
See VAR_POP Demo above |
|
VARIANCE |
Returns the variance of an expression |
VARIANCE([ DISTINCT | ALL] <value>)
OVER (<analytic_clause>) |
conn hr/hr
SELECT last_name, salary,
VARIANCE(salary) OVER (ORDER BY hire_date) AS VARIANCE
FROM employees
WHERE department_id = 30; |
|
Additional
Demos |
Another example:
This one written by Maxim Demenko |
CREATE TABLE test
(id, quality, weight) AS
SELECT 1,'A',10 FROM DUAL UNION ALL
SELECT 2,'A',11 FROM DUAL UNION ALL
SELECT 3,'A',12 FROM DUAL UNION ALL
SELECT 4,'B',11 FROM DUAL UNION ALL
SELECT 5,'B',19 FROM DUAL UNION ALL
SELECT 6,'A',9 FROM DUAL UNION ALL
SELECT 7,'A',14 FROM DUAL UNION ALL
SELECT 8,'C',4 FROM DUAL UNION ALL
SELECT 9,'C',7 FROM DUAL;
SELECT *
FROM test;
SELECT MAX(id) ID, MAX(quality) QUALITY, SUM(weight) WEIGHT
FROM (
SELECT id, quality, weight, SUM(new_seq)
OVER (ORDER BY id) new_grp
FROM (
SELECT id, quality, weight, DECODE((LAG(quality)
OVER (ORDER BY id),
quality, 0, id) new_seq
FROM test))
GROUP BY new_grp
ORDER BY 1; |