Oracle Histograms
Version 11.2.0.3
 
General Information
Histogram Types Oracle uses two types of histograms for column statistics: height-balanced histograms and frequency histograms.

The type of histogram is stored in the HISTOGRAM column of the *TAB_COL_STATISTICS views (USER and DBA). This column can have values of HEIGHT BALANCED, FREQUENCY, or NONE.

In a height-balanced histogram, the column values are divided into bands so that each band contains approximately the same number of rows. The useful information that the histogram provides is where in the range of values the endpoints fall.

In a frequency histogram, each value of the column corresponds to a single bucket of the histogram. Each bucket contains the number of occurrences of that single value. Frequency histograms are automatically created instead of height-balanced histograms when the number of distinct values is less than or equal to the number of histogram buckets specified. Frequency histograms can be viewed using the *TAB_HISTOGRAMS views.
Important note with respect to building histograms on columns As far as histograms go, the best strategy is to have none by default, and only create specific histograms when you are sure you need them.
~ Jonathan Lewis, 31-Jan-2010 in the OTN Database-General forum.
Data Dictionary Objects
DBA ALL USER
dba_histograms all_histograms user_histograms
dba_part_histograms all_part_histograms user_part_histograms
dba_subpart_histograms all_sub_part_histograms user_sub_part_histograms
dba_tab_histograms all_tab_histograms user_tab_histograms
 
Frequency Balanced
METHOD_OPT conn oe/oe

desc inventories

-- what values are stored in the warehouse_id column?
SELECT warehouse_id, COUNT(*)
FROM inventories
GROUP BY warehouse_id;

-- what is the current histogram on the warehouse_id column?
SELECT column_name, num_distinct, num_buckets, histogram
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID';

-- change the data
INSERT INTO warehouses (warehouse_id) VALUES (10);
INSERT INTO inventories VALUES (3515,10,10);
COMMIT;

SELECT warehouse_id, COUNT(*)
FROM inventories
GROUP BY warehouse_id;

explain plan for
SELECT product_id
FROM inventories
WHERE warehouse_id = 9;

SELECT * FROM TABLE(dbms_xplan.display);

explain plan for
SELECT product_id
FROM inventories
WHERE warehouse_id = 10;

SELECT * FROM TABLE(dbms_xplan.display);
-- now there's an obvious issue ... there is only 1 record not 16

-- we will now fix it by correcting the frequency histogram
desc user_tab_col_statistics

SELECT column_name, num_distinct, num_buckets, histogram
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID';

col low_value format a10
col high_value format a10

SELECT column_name, low_value, high_value, density, last_analyzed
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID';

-- create a function to decode the raw values

CREATE OR REPLACE FUNCTION raw2num(rawval IN RAW) RETURN NUMBER AUTHID DEFINER IS
 n NUMBER;
BEGIN
  dbms_stats.convert_raw_value(rawval, n);
  RETURN n;
END raw2num;
/

SELECT column_name, raw2num(low_value) LVAL, raw2num(high_value) HVAL
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID';

BEGIN
  dbms_stats.gather_table_stats(USER, 'INVENTORIES', METHOD_OPT => 'FOR COLUMNS SIZE 20 warehouse_id');
END;
/

SELECT column_name, raw2num(low_value) LVAL, raw2num(high_value) HVAL
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID';

SELECT endpoint_number, endpoint_value
FROM user_tab_histograms
WHERE table_name = 'INVENTORIES'
AND column_name = 'WAREHOUSE_ID'
ORDER BY 1;

explain plan for
SELECT product_id
FROM inventories
WHERE warehouse_id = 10;

SELECT * FROM TABLE(dbms_xplan.display);

DELETE FROM inventories WHERE warehouse_id = 10;
DELETE FROM warehouses WHERE warehouse_id = 10;
Manual Generation

This demonstration based on Jonathan Lewis' "Frequency Histograms" article in Issue 39 of OracleScene
CREATE TABLE fbhist_demo (
testcol VARCHAR2(1));

BEGIN
  FOR i IN 1.. 200 LOOP
    INSERT INTO fbhist_demo (testcol) VALUES ('A');
  END LOOP;
  FOR i IN 1.. 1800 LOOP
    INSERT INTO fbhist_demo (testcol) VALUES ('C');
  END LOOP;
  FOR i IN 1.. 8000 LOOP
    INSERT INTO fbhist_demo (testcol) VALUES ('E');
  END LOOP;
  COMMIT;
END;
/

DECLARE
 m_distcnt NUMBER := 3;       -- num_distinct
 m_density NUMBER := 1/1000;  -- density
 m_nullcnt NUMBER := 0;       -- num_nulls
 m_avgclen NUMBER := 1;       -- avg_col_len
 srec      dbms_stats.statrec;
 c_array   dbms_stats.chararray;
BEGIN
  srec.epc := 3;
  c_array := dbms_stats.chararray('A', 'C', 'E');
  srec.bkvals := dbms_stats.numarray(20, 180, 800);

  dbms_stats.prepare_column_values(srec, c_array);

  dbms_stats.set_column_stats(USER, 'FBHIST_DEMO', 'TESTCOL',
  distcnt => m_distcnt,
  density => m_density,
  nullcnt => m_nullcnt,
  srec    => srec,
  avgclen => m_avgclen);
END;
/

desc user_tab_cols

col LV format 99999
col HV format 99999

SELECT table_name, num_distinct, raw2num(low_value) LV, raw2num(high_value) HV,
density, num_nulls, avg_col_len, histogram
FROM user_tab_cols
WHERE table_name = 'FBHIST_DEMO';

EXPLAIN PLAN FOR
SELECT COUNT(*)
FROM fbhist_demo
WHERE testcol = 'E';

SELECT * FROM TABLE(dbms_xplan.display);
 
Height Balanced
Generation With DBMS_STATS dbms_stats.gather_table_stats(<schema_name>, <table_name>,
METHOD_OPT => FOR COLUMN SIZE <integer> <column_name>
SELECT column_name, num_distinct, num_buckets, histogram
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES';

SELECT quantity_on_hand, COUNT(*)
FROM inventories
GROUP BY quantity_on_hand;

SELECT endpoint_number, endpoint_value
FROM user_tab_histograms
WHERE table_name = 'INVENTORIES'
AND column_name = 'QUANTITY_ON_HAND'
ORDER BY 1;

BEGIN
  dbms_stats.gather_table_stats(USER, TABNAME => 'INVENTORIES', METHOD_OPT => 'FOR COLUMNS SIZE 10 quantity_on_hand');
END;
/

SELECT endpoint_number, endpoint_value
FROM user_tab_histograms
WHERE table_name = 'INVENTORIES'
AND column_name = 'QUANTITY_ON_HAND'
ORDER BY 1;

SELECT column_name, num_distinct, num_buckets, histogram
FROM user_tab_col_statistics
WHERE table_name = 'INVENTORIES';
Legacy generation with ANALYZE ANALYZE TABLE <schema.object_name>
COMPUTE STATISTICS FOR COLUMNS <column_name>
SIZE <number_of_buckets_integer>
ANALYZE TABLE servers
COMPUTE STATISTICS FOR COLUMNS srvr_id
SIZE 3;
Demo SELECT status, COUNT(*)
FROM all_objects
GROUP BY status;

CREATE TABLE demo AS
SELECT * FROM all_objects;

set autotrace traceonly explain

SELECT COUNT(*)
FROM demo
WHERE status = 'INVALID';

set autotrace off

exec dbms_stats.gather_table_stats('UWCLASS', 'DEMO', METHOD_OPT => 'FOR COLUMNS SIZE 3 status');

col table_name format a15
col column_name format a15

SELECT column_name, num_distinct, num_buckets, histogram
FROM user_tab_col_statistics
WHERE table_name = 'DEMO';

SELECT table_name, column_name, endpoint_number, endpoint_value
FROM user_histograms
WHERE table_name = 'DEMO';
Demo conn sh/sh

SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA';

SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52790;

SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52775;

set autotrace traceonly

SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA';

SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52790;

SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52775;

/*
For example, if you wish to add a column group consisting of the cust_state_province and country_id columns to the customers table in SH schema:
*/


DECLARE
 cg_name VARCHAR2(30);
BEGIN
cg_name := dbms_stats.create_extended_stats(USER, 'customers', '(cust_state_province,country_id)');
END;
/

set autotrace traceonly

SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA';

SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52790;

SELECT COUNT(*)
FROM customers
WHERE cust_state_province = 'CA'
AND country_id=52775;
 
Demo
Histogram Demo drop table servers purge;
drop table serv_inst purge;

@c:\temp\servers.sql

set autotrace traceonly explain

select s.srvr_id, i.type
from servers s, serv_inst i
where s.srvr_id = i.srvr_id
AND type = 'WIN';

select s.srvr_id, i.type
from servers s, serv_inst i
where s.srvr_id = i.srvr_id
AND type = 'MAC';

exec dbms_stats.gather_table_stats('UWCLASS', 'SERV_INST', METHOD_OPT => 'FOR COLUMNS SIZE SKEWONLY');

select s.srvr_id, i.type
from servers s, serv_inst i
where s.srvr_id = i.srvr_id
AND type = 'WIN';

select s.srvr_id, i.type
from servers s, serv_inst i
where s.srvr_id = i.srvr_id
AND type = 'MAC';
Generated using DBMS_STATS

Based closely on a demonstrated developed by Tom Kyte and used at Harmony 2012
CREATE TABLE t AS
SELECT do.*, CASE WHEN rownum < 500 THEN 1 ELSE 99 END AS some_status
FROM dba_objects do;

CREATE INDEX ix_t ON t(some_status);

exec dbms_stats.gather_table_stats(USER, 'T');

SELECT histogram
FROM user_tab_cols
WHERE table_name = 'T'
AND column_name = 'SOME_STATUS';

SELECT some_status, COUNT(*)
FROM t
GROUP BY some_status;

set autotrace traceonly explain

SELECT * FROM t WHERE some_status = 1;
SELECT * FROM t WHERE some_status = 99;

exec dbms_stats.gather_table_stats(USER, 'T');

set autotrace off

SELECT histogram
FROM user_tab_cols
WHERE table_name = 'T'
AND column_name = 'SOME_STATUS';

set autotrace traceonly explain

SELECT * FROM t WHERE some_status = 1;
SELECT * FROM t WHERE some_status = 99;

set autotrace off
col value format 9999999999

SELECT *
FROM (
  SELECT *
  FROM sys.col_usage$
  WHERE obj# = (
    SELECT object_id
    FROM dba_objects
    WHERE object_name = 'T'
    AND owner = 'UWCLASS')
    )
  UNPIVOT (value FOR x IN (
  EQUALITY_PREDS, EQUIJOIN_PREDS, NONEQUIJOIN_PREDS, RANGE_PREDS, LIKE_PREDS, NULL_PREDS));

SELECT * FROM t WHERE some_status > 100;

exec dbms_stats.gather_table_stats(USER, 'T');

SELECT *
FROM (
  SELECT *
  FROM sys.col_usage$
  WHERE obj# = (
    SELECT object_id
    FROM dba_objects
    WHERE object_name = 'T'
    AND owner = 'UWCLASS')
    )
  UNPIVOT (value FOR x IN (
  EQUALITY_PREDS, EQUIJOIN_PREDS, NONEQUIJOIN_PREDS, RANGE_PREDS, LIKE_PREDS, NULL_PREDS));
This demo is from Oracle 10.2.0.5 but serves as an important cautionary example for anyone using histograms.

The demo code is mine and the explanation from Jonathan Lewis.
Both of these explain plans were created using the same session logged into the same database within a matter of a few minutes. Note in the first explain plan "BITMAP CONVERSION" ... sometimes I get the first result, other times the second. All indexes are verified to be B*Tree (database version 10.2.0.5 on RedHat Linux V5U6). The SQL statement that generated the plans, below, was not altered in any way.


SQL> EXPLAIN PLAN FOR
   2 SELECT member_id, SUM(cnt)
   3 FROM (
   4 SELECT member_id, 1 CNT
   5 FROM webstore.orders
   6 WHERE site_id IN (23,24,25,29,30,31,32,33)
   7 AND status_id = 7
   8 AND date_received BETWEEN add_months(SYSDATE, -12) AND SYSDATE
   9 AND rownum = 1
  10 UNION ALL
  11 SELECT member_id, 1
  12 FROM webstore.orders
  13 WHERE site_id IN (23,24,25,29,30,31,32,33)
  14 AND status_id = 7
  15 AND date_received BETWEEN add_months(SYSDATE, -24) AND add_months(SYSDATE, -12)
  16 AND rownum = 1
  17 UNION ALL
  18 SELECT member_id, 1
  19 FROM webstore.orders
  20 WHERE site_id IN (23,24,25,29,30,31,32,33)
  21 AND status_id = 7
  22 AND date_received BETWEEN add_months(SYSDATE, -36) and add_months(SYSDATE, -24)
  23 AND rownum = 1)
  24 WHERE rownum < 4
  25 GROUP BY member_id
  26 HAVING SUM(cnt) > 1;

Explained.

SQL> select * from xplan;

PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------
|  Id | Operation                    | Name                | Rows| Bytes| Cost (%CPU)|
--------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT             |                     |   3 |  111 |   32    (7)|
|*  1 | FILTER                       |                     |     |      |            |
|   2 | HASH GROUP BY                |                     |   3 |  111 |   32    (7)|
|*  3 | COUNT STOPKEY                |                     |     |      |            |
|   4 | VIEW                         |                     |   3 |  111 |   31    (4)|
|   5 | UNION-ALL                    |                     |     |      |            |
|*  6 | COUNT STOPKEY                |                     |     |      |            |
|*  7 | TABLE ACCESS BY INDEX ROWID  | ORDERS              |   4 |  192 |    9    (0)|
|*  8 | INDEX RANGE SCAN             | XIE2ORDERS          |     |      |    3    (0)|
|*  9 | COUNT STOPKEY                |                     |     |      |            |
|* 10 | TABLE ACCESS BY INDEX ROWID  | ORDERS              |   5 |  240 |   12    (9)|
|  11 | BITMAP CONVERSION TO ROWIDS  |                     |     |      |            |
|  12 | BITMAP AND                   |                     |     |      |            |
|  13 | BITMAP CONVERSION FROM ROWIDS|                     |     |      |            |
|* 14 | INDEX RANGE SCAN             | XIE2ORDERS          |     |      |    3    (0)|
|  15 | BITMAP CONVERSION FROM ROWIDS|                     |     |      |            |
|  16 | SORT ORDER BY                |                     |     |      |            |
|* 17 | INDEX RANGE SCAN             | ORDERS_DATE_RECEIVED|     |      |    3    (0)|
|* 18 | COUNT STOPKEY                |                     |     |      |            |
|* 19 | TABLE ACCESS BY INDEX ROWID  | ORDERS              |   4 |  192 |   10    (0)|
|* 20 | INDEX RANGE SCAN             | XIE2ORDERS          |     |      |    3    (0)|
--------------------------------------------------------------------------------------

PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------
|  Id | Operation                    | Name                | Rows| Bytes| Cost (%CPU)|
--------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT             |                     |   3 |  111 |  34     (3)|
|*  1 | FILTER                       |                     |     |      |            |
|   2 | HASH GROUP BY                |                     |   3 |  111 |  34     (3)|
|*  3 | COUNT STOPKEY                |                     |     |      |            |
|   4 | VIEW                         |                     |   3 |  111 |  33     (0)|
|   5 | UNION-ALL                    |                     |     |      |            |
|*  6 | COUNT STOPKEY                |                     |     |      |            |
|*  7 | TABLE ACCESS BY INDEX ROWID  | ORDERS              |   5 |  240 |  11     (0)|
|*  8 | INDEX RANGE SCAN             | XIE2ORDERS          |     |      |   3     (0)|
|*  9 | COUNT STOPKEY                |                     |     |      |            |
|* 10 | TABLE ACCESS BY INDEX ROWID  | ORDERS              |   4 |  192 |  10     (0)|
|* 11 | INDEX RANGE SCAN             | XIE2ORDERS          |     |      |   3     (0)|
|* 12 | COUNT STOPKEY                |                     |     |      |            |
|* 13 | TABLE ACCESS BY INDEX ROWID  | ORDERS              |   5 |  240 |  12     (0)|
|* 14 | INDEX RANGE SCAN             | XIE2ORDERS          |     |      |   3     (0)|
--------------------------------------------------------------------------------------

The anomaly is, in principle, perfectly reasonable - and in your case could be explained simply by a histogram on the date column.

add_months(sysdate,N) is a known constant at optimisation time so, with a re-optimisation, a small change in sysdate, could produce different cardinalities and therefore different plans.
    ~Jonathan Lewis 19 November, 2011
 
 
Morgan's Library Page Footer
This site is maintained by Dan Morgan. Last Updated: This site is protected by copyright and trademark laws under U.S. and International law. © 1998-2014 Daniel A. Morgan All Rights Reserved