美文网首页
【SQL】抽样

【SQL】抽样

作者: 7ccc099f4608 | 来源:发表于2020-11-24 17:02 被阅读0次

随机采样


--获取10%的随机样本数据
SELECT t.*
FROM <t> t
WHERE RAND() < 0.1

分层采样

hash 版

select 
    bins, score, rank1, st_count
from 
(
    select 
        round(score, 3) as bins, 
        score,
        count(*) over (partition by hash(level)  ) as st_count, 
        rank() over (partition by hash(level) order by rand()) as rank1
    FROM 
        t_table
) A 
where rank1 <= 0.1 * st_count; 

非hash 版

select 
    bins, score, rank1, st_count
from 
(
    select 
        round(score, 3) as bins, 
        score,
        count(*) over (partition by level  ) as st_count, 
        rank() over (partition by level order by rand()) as rank1
    FROM 
        t_table
) A 
where rank1 <= 0.1 * st_count; 

相关文章

网友评论

      本文标题:【SQL】抽样

      本文链接:https://www.haomeiwen.com/subject/tooiiktx.html