135 lines
4.4 KiB
SQL
135 lines
4.4 KiB
SQL
-- script: tabhisthybrid.sql
|
|
-- author: Tanel Poder [tanelpoder.com]
|
|
-- created: Oct 2025
|
|
-- usage: @tabhisthybrid [<owner>.]<table_name> <column_name>
|
|
-- example: @tabhisthybrid soe.customers account_mgr_id
|
|
--
|
|
-- notes:
|
|
-- I reused the NewDensity logic already written by Mohamed Houri (and Alberto Dell'Era, Jonathan Lewis)
|
|
-- https://www.red-gate.com/simple-talk/databases/oracle-databases/12c-hybrid-histogram/
|
|
--
|
|
-- This script works with HYBRID histograms on NUMBER columns only for now.
|
|
-- I'm hoping to unify this script and my old tabhist.sql to show estimated cardinalities
|
|
-- (for equality filters) for all histogram types and data types someday.
|
|
|
|
COL phys_bucket HEAD BUCKET FOR 99999
|
|
COL table_owner FORMAT A15
|
|
COL table_name FORMAT A30
|
|
COL column_name FORMAT A30
|
|
COL data_type HEAD DATA_TYPE FORMAT A12
|
|
COL histogram_type HEAD HIST_TYPE FORMAT A12
|
|
COL endpoint_number FORMAT 9999999999
|
|
COL endpoint_value HEAD LOOKUP_VALUE FORMAT 9999999999
|
|
COL endpoint_actual_value FORMAT A40
|
|
COL estimated_rows FORMAT 9999999999
|
|
COL endpoint_repeat_count FORMAT 999999
|
|
COL olddensity FORMAT 0.00000000
|
|
COL newdensity FORMAT 0.00000000
|
|
|
|
WITH col AS (
|
|
SELECT
|
|
t.owner
|
|
, t.table_name
|
|
, t.num_rows
|
|
, c.column_name
|
|
, c.data_type
|
|
, c.histogram
|
|
, c.sample_size
|
|
, c.density AS OldDensity
|
|
, (c.sample_size - c.num_nulls) AS BktCnt
|
|
, c.num_distinct AS ndv
|
|
, c.num_buckets
|
|
, (c.sample_size - c.num_nulls) / c.num_buckets AS pop_bucketSize
|
|
FROM
|
|
dba_tables t
|
|
, dba_tab_columns c
|
|
WHERE
|
|
-- join
|
|
t.owner = c.owner
|
|
AND t.table_name = c.table_name
|
|
-- filter
|
|
AND UPPER(t.table_name) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',INSTR('&1','.')+1) ELSE '&1' END)
|
|
AND UPPER(t.owner) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',1,INSTR('&1','.')-1) ELSE USER END)
|
|
AND UPPER(c.column_name) = UPPER('&2')
|
|
),
|
|
hist AS (
|
|
SELECT
|
|
owner
|
|
, table_name
|
|
, column_name
|
|
, endpoint_number
|
|
, endpoint_repeat_count
|
|
, endpoint_value
|
|
, endpoint_actual_value
|
|
FROM
|
|
dba_tab_histograms
|
|
WHERE
|
|
UPPER(table_name) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',INSTR('&1','.')+1) ELSE '&1' END)
|
|
AND UPPER(owner) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',1,INSTR('&1','.')-1) ELSE USER END)
|
|
AND UPPER(column_name) = UPPER('&2')
|
|
),
|
|
bkt AS (
|
|
-- newdensity calc for a column (returns 1 row as we are looking into one column of one table)
|
|
SELECT
|
|
COUNT(*) PopValCnt
|
|
, SUM(endpoint_repeat_count) PopBktCnt
|
|
, ndv
|
|
, BktCnt
|
|
, pop_bucketSize
|
|
FROM
|
|
col
|
|
, hist
|
|
WHERE
|
|
endpoint_repeat_count > pop_bucketSize
|
|
GROUP BY
|
|
ndv
|
|
, BktCnt
|
|
, pop_bucketSize
|
|
),
|
|
nd AS (
|
|
SELECT
|
|
TRUNC(((BktCnt - PopBktCnt) / BktCnt) / (NDV - PopValCnt), 10) AS NewDensity
|
|
FROM
|
|
bkt
|
|
)
|
|
SELECT
|
|
rownum AS phys_bucket
|
|
, data_type AS data_type
|
|
, histogram AS histogram_type
|
|
, endpoint_value AS endpoint_value
|
|
, CASE
|
|
-- popular value
|
|
WHEN
|
|
histogram = 'HYBRID' AND NVL(endpoint_repeat_count, 0) > 1
|
|
THEN
|
|
ROUND(num_rows * (endpoint_repeat_count/sample_size))
|
|
-- non-popular but is an endpoint
|
|
WHEN
|
|
histogram = 'HYBRID' AND NVL(endpoint_repeat_count, 0) = 1
|
|
THEN
|
|
ROUND(num_rows * LEAST(newdensity, endpoint_repeat_count / sample_size))
|
|
-- TODO check if this condition does even exist in the wild
|
|
WHEN
|
|
histogram = 'HYBRID' AND endpoint_number IS NULL
|
|
THEN
|
|
-1
|
|
ELSE
|
|
NULL
|
|
END AS card_thisval
|
|
, TRUNC(c.num_rows * nd.newdensity) AS card_non_pop
|
|
-- , NVL(endpoint_repeat_count,0) AS endpoint_repeat_count
|
|
-- , endpoint_number AS endpoint_number
|
|
-- , endpoint_actual_value AS endpoint_actual_value
|
|
-- , olddensity AS olddensity
|
|
FROM
|
|
col c
|
|
, hist h
|
|
, nd -- single row
|
|
WHERE
|
|
c.owner = h.owner
|
|
AND c.table_name = h.table_name
|
|
AND c.column_name = h.column_name
|
|
ORDER BY
|
|
c.owner, c.table_name, c.column_name, h.endpoint_number
|
|
/
|