2026-03-12 20:23:15
This commit is contained in:
134
tpt/tabhisthybrid.sql
Normal file
134
tpt/tabhisthybrid.sql
Normal file
@@ -0,0 +1,134 @@
|
||||
-- script: tabhisthybrid.sql
|
||||
-- author: Tanel Poder [tanelpoder.com]
|
||||
-- created: Oct 2025
|
||||
-- usage: @tabhisthybrid [<owner>.]<table_name> <column_name>
|
||||
-- example: @tabhisthybrid soe.customers account_mgr_id
|
||||
--
|
||||
-- notes:
|
||||
-- I reused the NewDensity logic already written by Mohamed Houri (and Alberto Dell'Era, Jonathan Lewis)
|
||||
-- https://www.red-gate.com/simple-talk/databases/oracle-databases/12c-hybrid-histogram/
|
||||
--
|
||||
-- This script works with HYBRID histograms on NUMBER columns only for now.
|
||||
-- I'm hoping to unify this script and my old tabhist.sql to show estimated cardinalities
|
||||
-- (for equality filters) for all histogram types and data types someday.
|
||||
|
||||
COL phys_bucket HEAD BUCKET FOR 99999
|
||||
COL table_owner FORMAT A15
|
||||
COL table_name FORMAT A30
|
||||
COL column_name FORMAT A30
|
||||
COL data_type HEAD DATA_TYPE FORMAT A12
|
||||
COL histogram_type HEAD HIST_TYPE FORMAT A12
|
||||
COL endpoint_number FORMAT 9999999999
|
||||
COL endpoint_value HEAD LOOKUP_VALUE FORMAT 9999999999
|
||||
COL endpoint_actual_value FORMAT A40
|
||||
COL estimated_rows FORMAT 9999999999
|
||||
COL endpoint_repeat_count FORMAT 999999
|
||||
COL olddensity FORMAT 0.00000000
|
||||
COL newdensity FORMAT 0.00000000
|
||||
|
||||
WITH col AS (
|
||||
SELECT
|
||||
t.owner
|
||||
, t.table_name
|
||||
, t.num_rows
|
||||
, c.column_name
|
||||
, c.data_type
|
||||
, c.histogram
|
||||
, c.sample_size
|
||||
, c.density AS OldDensity
|
||||
, (c.sample_size - c.num_nulls) AS BktCnt
|
||||
, c.num_distinct AS ndv
|
||||
, c.num_buckets
|
||||
, (c.sample_size - c.num_nulls) / c.num_buckets AS pop_bucketSize
|
||||
FROM
|
||||
dba_tables t
|
||||
, dba_tab_columns c
|
||||
WHERE
|
||||
-- join
|
||||
t.owner = c.owner
|
||||
AND t.table_name = c.table_name
|
||||
-- filter
|
||||
AND UPPER(t.table_name) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',INSTR('&1','.')+1) ELSE '&1' END)
|
||||
AND UPPER(t.owner) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',1,INSTR('&1','.')-1) ELSE USER END)
|
||||
AND UPPER(c.column_name) = UPPER('&2')
|
||||
),
|
||||
hist AS (
|
||||
SELECT
|
||||
owner
|
||||
, table_name
|
||||
, column_name
|
||||
, endpoint_number
|
||||
, endpoint_repeat_count
|
||||
, endpoint_value
|
||||
, endpoint_actual_value
|
||||
FROM
|
||||
dba_tab_histograms
|
||||
WHERE
|
||||
UPPER(table_name) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',INSTR('&1','.')+1) ELSE '&1' END)
|
||||
AND UPPER(owner) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',1,INSTR('&1','.')-1) ELSE USER END)
|
||||
AND UPPER(column_name) = UPPER('&2')
|
||||
),
|
||||
bkt AS (
|
||||
-- newdensity calc for a column (returns 1 row as we are looking into one column of one table)
|
||||
SELECT
|
||||
COUNT(*) PopValCnt
|
||||
, SUM(endpoint_repeat_count) PopBktCnt
|
||||
, ndv
|
||||
, BktCnt
|
||||
, pop_bucketSize
|
||||
FROM
|
||||
col
|
||||
, hist
|
||||
WHERE
|
||||
endpoint_repeat_count > pop_bucketSize
|
||||
GROUP BY
|
||||
ndv
|
||||
, BktCnt
|
||||
, pop_bucketSize
|
||||
),
|
||||
nd AS (
|
||||
SELECT
|
||||
TRUNC(((BktCnt - PopBktCnt) / BktCnt) / (NDV - PopValCnt), 10) AS NewDensity
|
||||
FROM
|
||||
bkt
|
||||
)
|
||||
SELECT
|
||||
rownum AS phys_bucket
|
||||
, data_type AS data_type
|
||||
, histogram AS histogram_type
|
||||
, endpoint_value AS endpoint_value
|
||||
, CASE
|
||||
-- popular value
|
||||
WHEN
|
||||
histogram = 'HYBRID' AND NVL(endpoint_repeat_count, 0) > 1
|
||||
THEN
|
||||
ROUND(num_rows * (endpoint_repeat_count/sample_size))
|
||||
-- non-popular but is an endpoint
|
||||
WHEN
|
||||
histogram = 'HYBRID' AND NVL(endpoint_repeat_count, 0) = 1
|
||||
THEN
|
||||
ROUND(num_rows * LEAST(newdensity, endpoint_repeat_count / sample_size))
|
||||
-- TODO check if this condition does even exist in the wild
|
||||
WHEN
|
||||
histogram = 'HYBRID' AND endpoint_number IS NULL
|
||||
THEN
|
||||
-1
|
||||
ELSE
|
||||
NULL
|
||||
END AS card_thisval
|
||||
, TRUNC(c.num_rows * nd.newdensity) AS card_non_pop
|
||||
-- , NVL(endpoint_repeat_count,0) AS endpoint_repeat_count
|
||||
-- , endpoint_number AS endpoint_number
|
||||
-- , endpoint_actual_value AS endpoint_actual_value
|
||||
-- , olddensity AS olddensity
|
||||
FROM
|
||||
col c
|
||||
, hist h
|
||||
, nd -- single row
|
||||
WHERE
|
||||
c.owner = h.owner
|
||||
AND c.table_name = h.table_name
|
||||
AND c.column_name = h.column_name
|
||||
ORDER BY
|
||||
c.owner, c.table_name, c.column_name, h.endpoint_number
|
||||
/
|
||||
Reference in New Issue
Block a user