Files
oracle/tpt/tabhisthybrid.sql
2026-03-12 21:23:47 +01:00

135 lines
4.4 KiB
SQL

-- script: tabhisthybrid.sql
-- author: Tanel Poder [tanelpoder.com]
-- created: Oct 2025
-- usage: @tabhisthybrid [<owner>.]<table_name> <column_name>
-- example: @tabhisthybrid soe.customers account_mgr_id
--
-- notes:
-- I reused the NewDensity logic already written by Mohamed Houri (and Alberto Dell'Era, Jonathan Lewis)
-- https://www.red-gate.com/simple-talk/databases/oracle-databases/12c-hybrid-histogram/
--
-- This script works with HYBRID histograms on NUMBER columns only for now.
-- I'm hoping to unify this script and my old tabhist.sql to show estimated cardinalities
-- (for equality filters) for all histogram types and data types someday.
COL phys_bucket HEAD BUCKET FOR 99999
COL table_owner FORMAT A15
COL table_name FORMAT A30
COL column_name FORMAT A30
COL data_type HEAD DATA_TYPE FORMAT A12
COL histogram_type HEAD HIST_TYPE FORMAT A12
COL endpoint_number FORMAT 9999999999
COL endpoint_value HEAD LOOKUP_VALUE FORMAT 9999999999
COL endpoint_actual_value FORMAT A40
COL estimated_rows FORMAT 9999999999
COL endpoint_repeat_count FORMAT 999999
COL olddensity FORMAT 0.00000000
COL newdensity FORMAT 0.00000000
WITH col AS (
SELECT
t.owner
, t.table_name
, t.num_rows
, c.column_name
, c.data_type
, c.histogram
, c.sample_size
, c.density AS OldDensity
, (c.sample_size - c.num_nulls) AS BktCnt
, c.num_distinct AS ndv
, c.num_buckets
, (c.sample_size - c.num_nulls) / c.num_buckets AS pop_bucketSize
FROM
dba_tables t
, dba_tab_columns c
WHERE
-- join
t.owner = c.owner
AND t.table_name = c.table_name
-- filter
AND UPPER(t.table_name) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',INSTR('&1','.')+1) ELSE '&1' END)
AND UPPER(t.owner) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',1,INSTR('&1','.')-1) ELSE USER END)
AND UPPER(c.column_name) = UPPER('&2')
),
hist AS (
SELECT
owner
, table_name
, column_name
, endpoint_number
, endpoint_repeat_count
, endpoint_value
, endpoint_actual_value
FROM
dba_tab_histograms
WHERE
UPPER(table_name) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',INSTR('&1','.')+1) ELSE '&1' END)
AND UPPER(owner) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',1,INSTR('&1','.')-1) ELSE USER END)
AND UPPER(column_name) = UPPER('&2')
),
bkt AS (
-- newdensity calc for a column (returns 1 row as we are looking into one column of one table)
SELECT
COUNT(*) PopValCnt
, SUM(endpoint_repeat_count) PopBktCnt
, ndv
, BktCnt
, pop_bucketSize
FROM
col
, hist
WHERE
endpoint_repeat_count > pop_bucketSize
GROUP BY
ndv
, BktCnt
, pop_bucketSize
),
nd AS (
SELECT
TRUNC(((BktCnt - PopBktCnt) / BktCnt) / (NDV - PopValCnt), 10) AS NewDensity
FROM
bkt
)
SELECT
rownum AS phys_bucket
, data_type AS data_type
, histogram AS histogram_type
, endpoint_value AS endpoint_value
, CASE
-- popular value
WHEN
histogram = 'HYBRID' AND NVL(endpoint_repeat_count, 0) > 1
THEN
ROUND(num_rows * (endpoint_repeat_count/sample_size))
-- non-popular but is an endpoint
WHEN
histogram = 'HYBRID' AND NVL(endpoint_repeat_count, 0) = 1
THEN
ROUND(num_rows * LEAST(newdensity, endpoint_repeat_count / sample_size))
-- TODO check if this condition does even exist in the wild
WHEN
histogram = 'HYBRID' AND endpoint_number IS NULL
THEN
-1
ELSE
NULL
END AS card_thisval
, TRUNC(c.num_rows * nd.newdensity) AS card_non_pop
-- , NVL(endpoint_repeat_count,0) AS endpoint_repeat_count
-- , endpoint_number AS endpoint_number
-- , endpoint_actual_value AS endpoint_actual_value
-- , olddensity AS olddensity
FROM
col c
, hist h
, nd -- single row
WHERE
c.owner = h.owner
AND c.table_name = h.table_name
AND c.column_name = h.column_name
ORDER BY
c.owner, c.table_name, c.column_name, h.endpoint_number
/