| General |
Histogram Types |
Oracle uses two types of histograms for column statistics: height-balanced histograms and frequency histograms.
The type of histogram is stored in the HISTOGRAM column of the *TAB_COL_STATISTICS views (USER and DBA). This column can have values of HEIGHT BALANCED, FREQUENCY, or NONE.
In a height-balanced histogram, the column values are divided into bands so that each band contains
approximately the same number of rows. The useful information that the histogram provides is where in
the range of values the endpoints fall.
In a frequency histogram, each value of the column corresponds to a single bucket of the histogram. Each bucket contains the number of occurrences of that single value. Frequency histograms are automatically created instead of height-balanced histograms when the number of distinct values is less than or equal to the number of histogram buckets specified. Frequency histograms can be viewed using the *TAB_HISTOGRAMS
views. |
| Important note with
respect to building histograms on columns |
As far as histograms go, the
best strategy is to have none by default, and only create specific
histograms when you are sure you need them.
~ Jonathan Lewis, 31-Jan-2010 in the OTN Database-General forum. |
| Data Dictionary Objects |
|
DBA |
ALL |
USER |
| dba_histograms |
all_histograms |
user_histograms |
| dba_part_histograms |
all_part_histograms |
user_part_histograms |
| dba_subpart_histograms |
all_sub_part_histograms |
user_sub_part_histograms |
| dba_tab_histograms |
all_tab_histograms |
user_tab_histograms |
|
| |
| Frequency |
METHOD_OPT |
conn oe/oe
desc inventories
-- what values are stored in the warehouse_id column?
SELECT warehouse_id, COUNT(*)
FROM inventories
GROUP BY warehouse_id;
-- what is the current histogram on the warehouse_id
column?
SELECT column_name, num_distinct, num_buckets, histogram
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID';
-- change the data
INSERT INTO warehouses (warehouse_id) VALUES (10);
INSERT INTO inventories VALUES (3515,10,10);
COMMIT;
SELECT warehouse_id, COUNT(*)
FROM inventories
GROUP BY warehouse_id;
explain plan for
SELECT product_id
FROM inventories
WHERE warehouse_id = 9;
SELECT * FROM TABLE(dbms_xplan.display);
explain plan for
SELECT product_id
FROM inventories
WHERE warehouse_id = 10;
SELECT * FROM TABLE(dbms_xplan.display);
-- now there's an obvious issue ... there is only 1
record not 16
-- we will now fix it by correcting the frequency
histogram
desc user_tab_col_statistics
SELECT column_name, num_distinct, num_buckets, histogram
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID';
col low_value format a10
col high_value format a10
SELECT column_name, low_value, high_value, density, last_analyzed
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID';
-- create a function to decode the raw values
CREATE OR REPLACE FUNCTION raw2num(rawval IN raw) RETURN NUMBER AUTHID DEFINER IS
n NUMBER;
BEGIN
dbms_stats.convert_raw_value(rawval, n);
RETURN n;
END raw2num;
/ |
SELECT column_name, raw2num(low_value) LVAL, raw2num(high_value) HVAL
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID';
BEGIN
dbms_stats.gather_table_stats(USER,
'INVENTORIES', METHOD_OPT => 'FOR COLUMNS SIZE 20
warehouse_id');
END;
/
SELECT column_name, raw2num(low_value) LVAL, raw2num(high_value) HVAL
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID';
SELECT endpoint_number, endpoint_value
FROM user_tab_histograms
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID'
ORDER BY 1;
explain plan for
SELECT product_id
FROM inventories
WHERE warehouse_id = 10;
SELECT * FROM TABLE(dbms_xplan.display);
DELETE FROM inventories WHERE warehouse_id = 10;
DELETE FROM warehouses WHERE warehouse_id = 10; |
Manual Generation
This demonstration based on Jonathan Lewis' "Frequency Histograms" article
in Issue 39 of OracleScene |
CREATE TABLE
fbhist_demo (
testcol VARCHAR2(1));
BEGIN
FOR i IN 1.. 200 LOOP
INSERT INTO fbhist_demo (testcol) VALUES ('A');
END LOOP;
FOR i IN 1.. 1800 LOOP
INSERT INTO fbhist_demo (testcol) VALUES ('C');
END LOOP;
FOR i IN 1.. 8000 LOOP
INSERT INTO fbhist_demo (testcol) VALUES ('E');
END LOOP;
COMMIT;
END;
/
DECLARE
m_distcnt NUMBER := 3; -- num_distinct
m_density NUMBER := 1/1000; -- density
m_nullcnt NUMBER := 0; -- num_nulls
m_avgclen NUMBER := 1; -- avg_col_len
srec dbms_stats.statrec;
c_array dbms_stats.chararray;
BEGIN
srec.epc := 3;
c_array := dbms_stats.chararray('A', 'C', 'E');
srec.bkvals := dbms_stats.numarray(20, 180, 800);
dbms_stats.prepare_column_values(srec, c_array);
dbms_stats.set_column_stats(USER, 'FBHIST_DEMO', 'TESTCOL',
distcnt => m_distcnt,
density => m_density,
nullcnt => m_nullcnt,
srec => srec,
avgclen => m_avgclen);
END;
/
desc user_tab_cols
col LV format 99999
col HV format 99999
SELECT table_name, num_distinct, raw2num(low_value) LV, raw2num(high_value)
HV,
density, num_nulls, avg_col_len, histogram
FROM user_tab_cols
WHERE table_name = 'FBHIST_DEMO';
EXPLAIN PLAN FOR
SELECT COUNT(*)
FROM fbhist_demo
WHERE testcol = 'E';
SELECT * FROM TABLE(dbms_xplan.display); |
| |
| Height Balanced |
| |
Generation With DBMS_STATS |
dbms_stats.gather_table_stats(<schema_name>,
<table_name>,
METHOD_OPT => FOR COLUMN SIZE <integer> <column_name> |
SELECT column_name, num_distinct, num_buckets, histogram
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES';
SELECT quantity_on_hand, COUNT(*)
FROM inventories
GROUP BY quantity_on_hand;
SELECT endpoint_number, endpoint_value
FROM user_tab_histograms
WHERE table_name = 'INVENTORIES'
AND column_name = 'QUANTITY_ON_HAND'
ORDER BY 1;
BEGIN
dbms_stats.gather_table_stats(USER, TABNAME
=> 'INVENTORIES', METHOD_OPT => 'FOR COLUMNS SIZE 10 quantity_on_hand');
END;
/
SELECT endpoint_number, endpoint_value
FROM user_tab_histograms
WHERE table_name = 'INVENTORIES'
AND column_name = 'QUANTITY_ON_HAND'
ORDER BY 1;
SELECT column_name, num_distinct, num_buckets, histogram
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES'; |
| Legacy generation with ANALYZE |
ANALYZE TABLE <schema.object_name>
COMPUTE STATISTICS FOR COLUMNS <column_name>
SIZE <number_of_buckets_integer> |
ANALYZE TABLE servers
COMPUTE STATISTICS FOR COLUMNS srvr_id
SIZE 3; |
Demo |
SELECT status, COUNT(*)
FROM all_objects
GROUP BY status;
CREATE TABLE demo AS
SELECT * FROM all_objects;
set autotrace traceonly explain
SELECT COUNT(*)
FROM demo
WHERE status = 'INVALID';
set autotrace off
exec dbms_stats.gather_table_stats('UWCLASS', 'DEMO',
METHOD_OPT => 'FOR COLUMNS SIZE 3 status');
col table_name format a15
col column_name format a15
SELECT column_name, num_distinct, num_buckets, histogram
FROM user_tab_col_statistics
WHERE table_name = 'DEMO';
SELECT table_name, column_name, endpoint_number, endpoint_value
FROM user_histograms
WHERE table_name = 'DEMO';
|
Demo |
conn sh/sh
SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA';
SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52790;
SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52775;
set autotrace traceonly
SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA';
SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52790;
SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52775;
/*
For example, if you wish to add a column group consisting of the cust_state_province and country_id columns to the customers table in SH schema:
*/
DECLARE
cg_name VARCHAR2(30);
BEGIN
cg_name := dbms_stats.create_extended_stats(USER,
'customers', '(cust_state_province,country_id)');
END;
/
set autotrace traceonly
SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA';
SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52790;
SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52775; |
| |
| Demo |
Histogram Demo |
drop table servers purge;
drop table serv_inst purge;
@c:\temp\servers.sql
set autotrace traceonly explain
select s.srvr_id, i.type
from servers s, serv_inst i
where s.srvr_id = i.srvr_id
AND type = 'WIN';
select s.srvr_id, i.type
from servers s, serv_inst i
where s.srvr_id = i.srvr_id
AND type = 'MAC';
exec dbms_stats.gather_table_stats('UWCLASS', 'SERV_INST', METHOD_OPT => 'FOR COLUMNS SIZE SKEWONLY');
select s.srvr_id, i.type
from servers s, serv_inst i
where s.srvr_id = i.srvr_id
AND type = 'WIN';
select s.srvr_id, i.type
from servers s, serv_inst i
where s.srvr_id = i.srvr_id
AND type = 'MAC'; |
|