TEST01数据来自DBA_OBJECTS重复512次共4500W行占用5G存储空间create table test_a as select rowid as rid,a.* from test01 a; create table test_b as select rowid as rid,a.* from test01 a; begin DBMS_STATS.GATHER_TABLE_STATS(ownname SCOTT, tabname TEST_A, estimate_percent 100, ---oracle改成100崖山改成1 method_opt for all columns size auto, no_invalidate FALSE, degree 6, granularity ALL, cascade TRUE); end; / begin DBMS_STATS.GATHER_TABLE_STATS(ownname SCOTT, tabname TEST_B, estimate_percent 100, ---oracle改成100崖山改成1 method_opt for all columns size auto, no_invalidate FALSE, degree 6, granularity ALL, cascade TRUE); end; /HASH JOIN场景太多了有内连接外连接全外连接半连接反连接1个列关联多个列关联JOIN非等值关联过滤小表JOIN大表大表JOIN大表1:1关联1:N关联N:N关联以及各种不同数据类型列关联等等这么多场景根本测不完我这里选择大表JOIN大表1:1关联关联列使用ROWID类型也算是具有代表性了崖山23.5.1相关参数设置重启DB后生效alter system set DATA_BUFFER_SIZE16G scopespfile; ---BUFFER CACHE设置为16G alter system set _HOT_BUFFER_PCT25 scopespfile; ---热块占用比例设置为25%使得全表扫描后表能被CACHE在内存中 alter system set MEX_POOL_SIZE8G scopespfile; ---批量模式HASH JOIN,HASH GROUP BY,ORDER专用内存 alter system set VM_BUFFER_SIZE8G scopespfile; ---行存表ORDER BY,HASH JOIN存放中间结果集临时缓存Oracle19c设置SGA16G,PGA16G修改直接路径读参数neveralter system set _serial_direct_readnever;测试SQL共4条测试Oracle19c物理机Oracle19c虚拟机崖山23.5.1虚拟机单行模式崖山23.5.1虚拟机批量模式4条SQL4种场景性能表现注意事项1.崖山23.5.1版本中批量模式需要添加HINT batch_mode(2)2.崖山并行不能直接parallel(4)需要对表设置并行度(行为和Oracle10g相似)另外崖山并行必须使用批量模式完整HINT parallel(a,4) parallel(b,4) batch_mode(2)测试SQL如下(崖山批量模式并行需要添加HINT)select count(*) from test_a a, test_b b where a.rid b.rid; select count(distinct a.object_id),count(distinct b.object_name) from test_a a, test_b b where a.rid b.rid; select count(distinct a.owner), count(distinct a.object_name), count(distinct a.subobject_name), count(distinct a.object_id), count(distinct a.data_object_id), count(distinct a.object_type), count(distinct a.created), count(distinct a.last_ddl_time), count(distinct a.timestamp), count(distinct a.status), count(distinct a.temporary), count(distinct a.generated), count(distinct a.secondary), count(distinct a.namespace), count(distinct a.edition_name), count(distinct b.owner), count(distinct b.object_name), count(distinct b.subobject_name), count(distinct b.object_id), count(distinct b.data_object_id), count(distinct b.object_type), count(distinct b.created), count(distinct b.last_ddl_time), count(distinct b.timestamp), count(distinct b.status), count(distinct b.temporary), count(distinct b.generated), count(distinct b.secondary), count(distinct b.namespace), count(distinct b.edition_name) from test_a a, test_b b where a.rid b.rid; select /* parallel(4) */ count(distinct a.owner), count(distinct a.object_name), count(distinct a.subobject_name), count(distinct a.object_id), count(distinct a.data_object_id), count(distinct a.object_type), count(distinct a.created), count(distinct a.last_ddl_time), count(distinct a.timestamp), count(distinct a.status), count(distinct a.temporary), count(distinct a.generated), count(distinct a.secondary), count(distinct a.namespace), count(distinct a.edition_name), count(distinct b.owner), count(distinct b.object_name), count(distinct b.subobject_name), count(distinct b.object_id), count(distinct b.data_object_id), count(distinct b.object_type), count(distinct b.created), count(distinct b.last_ddl_time), count(distinct b.timestamp), count(distinct b.status), count(distinct b.temporary), count(distinct b.generated), count(distinct b.secondary), count(distinct b.namespace), count(distinct b.edition_name) from test_a a, test_b b where a.rid b.rid;测试结果测试结果有点吓人Oracle咋这么挫应该是遇到了Oracle19c PGA memory operation BUG了针对本次测试场景Oracle19c是慢在单个SESSION PGA内存使用限制上了本次测试PGA16G_pga_max_size默认2G_smm_max_size默认1G)Oracle要想设置较大work area size需要设置较大的pga_aggregate_target调整隐藏参数_pga_max_size设置完之后会自动设置_smm_max_size我设置pga_aggregate_target32G_pga_max_size16G_smm_max_size被自动设置为了6.399G但是这样设置之后有些SQL反而更慢 一直在等PGA memory operation实锤19c是遇到BUG了要打补丁。后面用Oracle26ai重新进行了测试4条SQL耗时如下_pga_max_size2G_smm_max_size自动设置为1G 00:00:17.63 00:00:26.82 00:02:07.79 00:00:34.08 _pga_max_size4G_smm_max_size自动设置为2G 00:00:16.11 00:00:24.30 00:02:04.30 00:00:25.67 _pga_max_size8G_smm_max_size自动设置为4G 00:00:13.95 00:00:20.25 00:01:55.14 00:00:26.76 _pga_max_size16G_smm_max_size自动设置为6.399G 00:00:13.48 00:00:20.44 00:01:48.90 00:00:25.60我就不更正测试结果的表格了各位看官老爷知道就行了崖山VM_BUFFER_SIZE8GMEX_POOL_SIZE8G崖山不需要像Oracle那样修改隐含参数没对单个会话使用私有内存做限制最后给崖山提个小建议对SQL开启并行能否别搞这么复杂 parallel(a,4) parallel(b,4) batch_mode(2)直接parallel(4)最近有很多小伙伴问我为什么我最近几个月一直在测试崖山那是因为1.崖山有RAC能够满足核心系统高可用要求(核心系统要么分布式要么RAC不可能用主备)2.崖山基础算子性能强配合使用批量模式很多场景性能比Oracle还强3.崖山没有PG系顽疾表膨胀索引膨胀4.崖山事务行为和Oracle一致(PG系数据库事务行为和Oracle有些差异)5.体系结构高度相似Oracle学习成本低6.本人搞性能优化出身测试完崖山性能再对比其他国产数据库直摇头7.崖山不是在PGMySQL上做二次研发PGMySQL上限已被锁死对于一款真正用心在做研发的国产数据库我们做的各种测试也是帮崖山能更快的打磨好各种细节希望良币驱逐劣币
测试Oracle-崖山HASH JOIN性能
TEST01数据来自DBA_OBJECTS重复512次共4500W行占用5G存储空间create table test_a as select rowid as rid,a.* from test01 a; create table test_b as select rowid as rid,a.* from test01 a; begin DBMS_STATS.GATHER_TABLE_STATS(ownname SCOTT, tabname TEST_A, estimate_percent 100, ---oracle改成100崖山改成1 method_opt for all columns size auto, no_invalidate FALSE, degree 6, granularity ALL, cascade TRUE); end; / begin DBMS_STATS.GATHER_TABLE_STATS(ownname SCOTT, tabname TEST_B, estimate_percent 100, ---oracle改成100崖山改成1 method_opt for all columns size auto, no_invalidate FALSE, degree 6, granularity ALL, cascade TRUE); end; /HASH JOIN场景太多了有内连接外连接全外连接半连接反连接1个列关联多个列关联JOIN非等值关联过滤小表JOIN大表大表JOIN大表1:1关联1:N关联N:N关联以及各种不同数据类型列关联等等这么多场景根本测不完我这里选择大表JOIN大表1:1关联关联列使用ROWID类型也算是具有代表性了崖山23.5.1相关参数设置重启DB后生效alter system set DATA_BUFFER_SIZE16G scopespfile; ---BUFFER CACHE设置为16G alter system set _HOT_BUFFER_PCT25 scopespfile; ---热块占用比例设置为25%使得全表扫描后表能被CACHE在内存中 alter system set MEX_POOL_SIZE8G scopespfile; ---批量模式HASH JOIN,HASH GROUP BY,ORDER专用内存 alter system set VM_BUFFER_SIZE8G scopespfile; ---行存表ORDER BY,HASH JOIN存放中间结果集临时缓存Oracle19c设置SGA16G,PGA16G修改直接路径读参数neveralter system set _serial_direct_readnever;测试SQL共4条测试Oracle19c物理机Oracle19c虚拟机崖山23.5.1虚拟机单行模式崖山23.5.1虚拟机批量模式4条SQL4种场景性能表现注意事项1.崖山23.5.1版本中批量模式需要添加HINT batch_mode(2)2.崖山并行不能直接parallel(4)需要对表设置并行度(行为和Oracle10g相似)另外崖山并行必须使用批量模式完整HINT parallel(a,4) parallel(b,4) batch_mode(2)测试SQL如下(崖山批量模式并行需要添加HINT)select count(*) from test_a a, test_b b where a.rid b.rid; select count(distinct a.object_id),count(distinct b.object_name) from test_a a, test_b b where a.rid b.rid; select count(distinct a.owner), count(distinct a.object_name), count(distinct a.subobject_name), count(distinct a.object_id), count(distinct a.data_object_id), count(distinct a.object_type), count(distinct a.created), count(distinct a.last_ddl_time), count(distinct a.timestamp), count(distinct a.status), count(distinct a.temporary), count(distinct a.generated), count(distinct a.secondary), count(distinct a.namespace), count(distinct a.edition_name), count(distinct b.owner), count(distinct b.object_name), count(distinct b.subobject_name), count(distinct b.object_id), count(distinct b.data_object_id), count(distinct b.object_type), count(distinct b.created), count(distinct b.last_ddl_time), count(distinct b.timestamp), count(distinct b.status), count(distinct b.temporary), count(distinct b.generated), count(distinct b.secondary), count(distinct b.namespace), count(distinct b.edition_name) from test_a a, test_b b where a.rid b.rid; select /* parallel(4) */ count(distinct a.owner), count(distinct a.object_name), count(distinct a.subobject_name), count(distinct a.object_id), count(distinct a.data_object_id), count(distinct a.object_type), count(distinct a.created), count(distinct a.last_ddl_time), count(distinct a.timestamp), count(distinct a.status), count(distinct a.temporary), count(distinct a.generated), count(distinct a.secondary), count(distinct a.namespace), count(distinct a.edition_name), count(distinct b.owner), count(distinct b.object_name), count(distinct b.subobject_name), count(distinct b.object_id), count(distinct b.data_object_id), count(distinct b.object_type), count(distinct b.created), count(distinct b.last_ddl_time), count(distinct b.timestamp), count(distinct b.status), count(distinct b.temporary), count(distinct b.generated), count(distinct b.secondary), count(distinct b.namespace), count(distinct b.edition_name) from test_a a, test_b b where a.rid b.rid;测试结果测试结果有点吓人Oracle咋这么挫应该是遇到了Oracle19c PGA memory operation BUG了针对本次测试场景Oracle19c是慢在单个SESSION PGA内存使用限制上了本次测试PGA16G_pga_max_size默认2G_smm_max_size默认1G)Oracle要想设置较大work area size需要设置较大的pga_aggregate_target调整隐藏参数_pga_max_size设置完之后会自动设置_smm_max_size我设置pga_aggregate_target32G_pga_max_size16G_smm_max_size被自动设置为了6.399G但是这样设置之后有些SQL反而更慢 一直在等PGA memory operation实锤19c是遇到BUG了要打补丁。后面用Oracle26ai重新进行了测试4条SQL耗时如下_pga_max_size2G_smm_max_size自动设置为1G 00:00:17.63 00:00:26.82 00:02:07.79 00:00:34.08 _pga_max_size4G_smm_max_size自动设置为2G 00:00:16.11 00:00:24.30 00:02:04.30 00:00:25.67 _pga_max_size8G_smm_max_size自动设置为4G 00:00:13.95 00:00:20.25 00:01:55.14 00:00:26.76 _pga_max_size16G_smm_max_size自动设置为6.399G 00:00:13.48 00:00:20.44 00:01:48.90 00:00:25.60我就不更正测试结果的表格了各位看官老爷知道就行了崖山VM_BUFFER_SIZE8GMEX_POOL_SIZE8G崖山不需要像Oracle那样修改隐含参数没对单个会话使用私有内存做限制最后给崖山提个小建议对SQL开启并行能否别搞这么复杂 parallel(a,4) parallel(b,4) batch_mode(2)直接parallel(4)最近有很多小伙伴问我为什么我最近几个月一直在测试崖山那是因为1.崖山有RAC能够满足核心系统高可用要求(核心系统要么分布式要么RAC不可能用主备)2.崖山基础算子性能强配合使用批量模式很多场景性能比Oracle还强3.崖山没有PG系顽疾表膨胀索引膨胀4.崖山事务行为和Oracle一致(PG系数据库事务行为和Oracle有些差异)5.体系结构高度相似Oracle学习成本低6.本人搞性能优化出身测试完崖山性能再对比其他国产数据库直摇头7.崖山不是在PGMySQL上做二次研发PGMySQL上限已被锁死对于一款真正用心在做研发的国产数据库我们做的各种测试也是帮崖山能更快的打磨好各种细节希望良币驱逐劣币