Files
notes/histograms/histogram_02.txt
2026-03-12 22:01:38 +01:00

253 lines
5.6 KiB
Plaintext

drop table T1 purge;
create table T1 tablespace TS1 as
select
rownum id,
decode(mod(rownum,10),0,10,1) col1
from ( select 1 just_a_column
from DUAL
connect by level <= 100000
)
/
---------
drop table T1 purge;
create table T1 tablespace TS1 as
select
rownum id,
decode(mod(rownum,3),0,'m3',
decode(mod(rownum,5),0,'m5',
decode(mod(rownum,7),0,'m7',
decode(mod(rownum,11),0,'m11',
decode(mod(rownum,13),0,'m13',
decode(mod(rownum,17),0,'m17',
'other')))))) col1
from ( select 1 just_a_column
from DUAL
connect by level <= 100000
)
/
------------
drop table T1 purge;
create table T1 tablespace TS1 as
select
rownum id,
case when rownum<=10 then rownum else 99999 end col1,
case when rownum<=400 then rownum else 99999 end col2,
case when rownum<=4000 then rownum else 99999 end col3,
case when rownum<=10000 then rownum else 99999 end col4
from ( select 1 just_a_column
from DUAL
connect by level <= 100000
)
/
---------
drop table T1 purge;
create table T1 tablespace TS1 as
select
rownum id,
case when rownum>=1 and rownum<1000 then mod(rownum,10) else 99999 end col1,
case when rownum>=1 and rownum<99900 then mod(rownum,1000) else rownum end col2,
mod(rownum,300) col3
from ( select 1 just_a_column
from DUAL
connect by level <= 100000
)
/
---------
drop table T1 purge;
create table T1 tablespace TS1 as
select
rownum id,
mod(rownum,254) col1,
mod(rownum,255) col2,
mod(rownum,256) col3
from ( select 1 just_a_column
from DUAL
connect by level <= 100000
)
/
exec dbms_stats.gather_table_stats(user,'T1', method_opt=>'for all columns size SKEWONLY');
select column_name,num_distinct,density,num_nulls,num_buckets,sample_size,histogram
from user_tab_col_statistics
where table_name='T1';
select endpoint_value as column_value,
endpoint_number as cummulative_frequency,
endpoint_number - lag(endpoint_number,1,0) over (order by endpoint_number) as frequency
from user_tab_histograms
where table_name = 'T1' and column_name = 'COL4';
select col1,count(*) from T1 group by col1 order by 2 desc;
--------------------
https://www.red-gate.com/simple-talk/databases/oracle-databases/12c-histogram-top-frequency/
drop table T_TopFreq purge;
create table T_TopFreq as
select
rownum n1
, case when mod(rownum, 100000) = 0 then 90
when mod(rownum, 10000) = 0 then 180
when mod(rownum, 1000) = 0 then 84
when mod(rownum, 100) = 0 then 125
when mod(rownum,50) = 2 then 7
when mod(rownum-1,80) = 2 then 22
when mod(rownum, 10) = 0 then 19
when mod(rownum-1,10) = 5 then 15
when mod(rownum-1,5) = 1 then 11
when trunc((rownum -1/3)) < 5 then 25
when trunc((rownum -1/5)) < 20 then 33
else 42
end n2
from dual
connect by level <= 2e2
/
set serveroutput ON
exec dbms_stats.set_global_prefs ('TRACE', to_char (1+16));
exec dbms_stats.gather_table_stats (user,'T_TOPFREQ',method_opt=> 'for columns n2 size 8');
exec dbms_stats.set_global_prefs('TRACE', null);
select
sum (cnt) TopNRows
from (select
n2
,count(*) cnt
from t_topfreq
group by n2
order by count(*) desc
)
where rownum <= 8;
with FREQ as
( select
n2
,count(*) cnt
from t_topfreq
group by n2
order by count(*) desc
)
select sum(cnt) from FREQ where rownum<=8;
select column_name,num_distinct,density,num_nulls,num_buckets,sample_size,histogram
from user_tab_col_statistics
where table_name='T_TOPFREQ';
--------------------------------------------------------------
drop table T1 purge;
create table T1 tablespace TS1 as
select
rownum id,
mod(rownum,300) col1
from ( select 1 just_a_column
from DUAL
connect by level <= 100e3
)
/
update T1 set col1=567 where id between 70e3 and 75e3;
update T1 set col1=678 where id between 75e3 and 90e3;
update T1 set col1=789 where id between 90e3 and 100e3;
exec dbms_stats.gather_table_stats(user,'T1', method_opt=>'for all columns size SKEWONLY');
-- type de histogram
select column_name,num_distinct,density,num_nulls,num_buckets,sample_size,histogram
from user_tab_col_statistics
where table_name='T1';
-- how many rows are in the TOP-N values ?
with FREQ as
( select
col1
,count(*) cnt
from T1
group by col1
order by count(*) desc
)
select sum(cnt) from FREQ where rownum<=254
;
-- frequency by column value / bucket
select endpoint_value as column_value,
endpoint_number as cummulative_frequency,
endpoint_number - lag(endpoint_number,1,0) over (order by endpoint_number) as frequency,
ENDPOINT_REPEAT_COUNT
from user_tab_histograms
where table_name = 'T1' and column_name = 'COL1';
--------------------------------------------------------------
--------------------------------------------------------------
drop table T1 purge;
create table T1 tablespace TS1 as
select
rownum id,
mod(rownum,2000) col1
from ( select 1 just_a_column
from DUAL
connect by level <= 1000e3
)
/
exec dbms_stats.gather_table_stats(user,'T1', method_opt=>'for all columns size 2048');
-- type de histogram
select column_name,num_distinct,density,num_nulls,num_buckets,sample_size,histogram
from user_tab_col_statistics
where table_name='T1';