-- script: tabhisthybrid.sql -- author: Tanel Poder [tanelpoder.com] -- created: Oct 2025 -- usage: @tabhisthybrid [.] -- example: @tabhisthybrid soe.customers account_mgr_id -- -- notes: -- I reused the NewDensity logic already written by Mohamed Houri (and Alberto Dell'Era, Jonathan Lewis) -- https://www.red-gate.com/simple-talk/databases/oracle-databases/12c-hybrid-histogram/ -- -- This script works with HYBRID histograms on NUMBER columns only for now. -- I'm hoping to unify this script and my old tabhist.sql to show estimated cardinalities -- (for equality filters) for all histogram types and data types someday. COL phys_bucket HEAD BUCKET FOR 99999 COL table_owner FORMAT A15 COL table_name FORMAT A30 COL column_name FORMAT A30 COL data_type HEAD DATA_TYPE FORMAT A12 COL histogram_type HEAD HIST_TYPE FORMAT A12 COL endpoint_number FORMAT 9999999999 COL endpoint_value HEAD LOOKUP_VALUE FORMAT 9999999999 COL endpoint_actual_value FORMAT A40 COL estimated_rows FORMAT 9999999999 COL endpoint_repeat_count FORMAT 999999 COL olddensity FORMAT 0.00000000 COL newdensity FORMAT 0.00000000 WITH col AS ( SELECT t.owner , t.table_name , t.num_rows , c.column_name , c.data_type , c.histogram , c.sample_size , c.density AS OldDensity , (c.sample_size - c.num_nulls) AS BktCnt , c.num_distinct AS ndv , c.num_buckets , (c.sample_size - c.num_nulls) / c.num_buckets AS pop_bucketSize FROM dba_tables t , dba_tab_columns c WHERE -- join t.owner = c.owner AND t.table_name = c.table_name -- filter AND UPPER(t.table_name) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',INSTR('&1','.')+1) ELSE '&1' END) AND UPPER(t.owner) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',1,INSTR('&1','.')-1) ELSE USER END) AND UPPER(c.column_name) = UPPER('&2') ), hist AS ( SELECT owner , table_name , column_name , endpoint_number , endpoint_repeat_count , endpoint_value , endpoint_actual_value FROM dba_tab_histograms WHERE UPPER(table_name) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',INSTR('&1','.')+1) ELSE '&1' END) AND UPPER(owner) = UPPER(CASE WHEN INSTR('&1','.')>0 THEN SUBSTR('&1',1,INSTR('&1','.')-1) ELSE USER END) AND UPPER(column_name) = UPPER('&2') ), bkt AS ( -- newdensity calc for a column (returns 1 row as we are looking into one column of one table) SELECT COUNT(*) PopValCnt , SUM(endpoint_repeat_count) PopBktCnt , ndv , BktCnt , pop_bucketSize FROM col , hist WHERE endpoint_repeat_count > pop_bucketSize GROUP BY ndv , BktCnt , pop_bucketSize ), nd AS ( SELECT TRUNC(((BktCnt - PopBktCnt) / BktCnt) / (NDV - PopValCnt), 10) AS NewDensity FROM bkt ) SELECT rownum AS phys_bucket , data_type AS data_type , histogram AS histogram_type , endpoint_value AS endpoint_value , CASE -- popular value WHEN histogram = 'HYBRID' AND NVL(endpoint_repeat_count, 0) > 1 THEN ROUND(num_rows * (endpoint_repeat_count/sample_size)) -- non-popular but is an endpoint WHEN histogram = 'HYBRID' AND NVL(endpoint_repeat_count, 0) = 1 THEN ROUND(num_rows * LEAST(newdensity, endpoint_repeat_count / sample_size)) -- TODO check if this condition does even exist in the wild WHEN histogram = 'HYBRID' AND endpoint_number IS NULL THEN -1 ELSE NULL END AS card_thisval , TRUNC(c.num_rows * nd.newdensity) AS card_non_pop -- , NVL(endpoint_repeat_count,0) AS endpoint_repeat_count -- , endpoint_number AS endpoint_number -- , endpoint_actual_value AS endpoint_actual_value -- , olddensity AS olddensity FROM col c , hist h , nd -- single row WHERE c.owner = h.owner AND c.table_name = h.table_name AND c.column_name = h.column_name ORDER BY c.owner, c.table_name, c.column_name, h.endpoint_number /