Skip to content
This repository has been archived by the owner on Jan 4, 2021. It is now read-only.

性能优化测试.sql #72

Open
OpenMIS opened this issue Mar 22, 2019 · 0 comments
Open

性能优化测试.sql #72

OpenMIS opened this issue Mar 22, 2019 · 0 comments

Comments

@OpenMIS
Copy link

OpenMIS commented Mar 22, 2019

--创建临时表
create temp table foo(a int, b int, c int, d int, e int);
--随机200万条记录
insert into foo select random()*100, random()*100, random()*100, random()*100, random()*100 from generate_series(1,2000000);

--使用内置的C语言实现的方法
select count(*) filter (where a = least(a,b,c,d,e)) from foo;
--execution time: 531 ms; total time: 547 ms

CREATE FUNCTION min2(integer,integer,integer,integer,integer) RETURNS integer
AS 'DllTest2', 'min2'
LANGUAGE C STRICT;

--使用C语言写的扩展实现的方法
select count(*) filter (where a = min2(a,b,c,d,e)) from foo;
--execution time: 531 ms; total time: 546 ms

--使用pgsql调用内置的C语言实现的least函数
CREATE OR REPLACE FUNCTION public.myleast1a(a integer, b integer, c integer, d integer, e integer )
RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT
AS $function$
declare

begin
return least(a,b,c,d,e);
end;
$function$;

--执行测试函数
select count(*) filter (where a = myleast1a(a,b,c,d,e)) from foo;
--execution time: 2.672 sec; total time: 2.688 sec

--使用pgsql,在里面使用sql。【注意】此种性能非常差,不能这么用。
CREATE OR REPLACE FUNCTION public.myleast2(VARIADIC integer[])
RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT
AS $function$
declare result int;
a int;
begin
return (select min(v) from unnest($1) g(v));
end;
$function$

--执行测试函数
select count(*) filter (where a = myleast2(a,b,c,d,e)) from foo;
--execution time: 30.813 sec; total time: 30.813 sec

--使用单语句的SQL,【注意】性能比上面好点,但也非常差。
CREATE OR REPLACE FUNCTION public.myleast3(VARIADIC integer[])
RETURNS integer LANGUAGE sql IMMUTABLE STRICT
AS $function$select min(v) from unnest($1) g(v)$function$

--执行测试函数
select count(*) filter (where a = myleast3(a,b,c,d,e)) from foo;
--execution time: 20.031 sec; total time: 20.031 sec

--使用pgsql
CREATE OR REPLACE FUNCTION public.myleast1(VARIADIC integer[])
RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT
AS $function$
declare result int;
a int;
begin
foreach a in array $1
loop
if result is null then
result := a;
elseif a < result then
result := a;
end if;
end loop;
return result;
end;
$function$;

--执行测试函数
select count(*) filter (where a = myleast1(a,b,c,d,e)) from foo;
--execution time: 8.000 sec; total time: 8.016 sec

--使用pgsql调用内置的C语言实现的least函数
CREATE OR REPLACE FUNCTION public.myleast1a(VARIADIC integer[])
RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT
AS $function$
begin
return least($1);
end;
$function$;

--执行测试函数
select count(*) filter (where a = myleast1a(a,b,c,d,e)) from foo;

--使用pgsql,减少if语句
CREATE OR REPLACE FUNCTION public.myleast1b(VARIADIC integer[])
RETURNS integer
LANGUAGE plpgsql
IMMUTABLE STRICT
AS $function$
declare result int;
a int;
begin
foreach a in array $1
loop
if result is null or a < result then
result := a;
end if;
end loop;
return result;
end;
$function$;

--执行测试函数
select count(*) filter (where a = myleast1b(a,b,c,d,e)) from foo;
--execution time: 7.187 sec; total time: 7.187 sec

--使用pgsql,使用case替代if语句
CREATE OR REPLACE FUNCTION public.myleast1c(VARIADIC integer[])
RETURNS integer LANGUAGE plpgsql IMMUTABLE STRICT
AS $function$
declare result int;
a int;
begin
foreach a in array $1
loop
result := case when a < result then a else coalesce(result, a) end;
end loop;
return result;
end;
$function$;

--执行测试函数
select count(*) filter (where a = myleast1c(a,b,c,d,e)) from foo;
--execution time: 6.687 sec; total time: 6.703 sec

--以下都是使用python 3.6.5 64位 windows 环境
--纯python数据库函数
CREATE OR REPLACE FUNCTION public.myleast6(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$

r = None
for x in a:
if r is None or x < r:
r = x
return r
$function$

--执行测试函数
select count(*) filter (where a = myleast6(a,b,c,d,e)) from foo;
--execution time: 7.344 sec; total time: 7.344 sec

--纯python数据库函数,在内部使用jit
--【注意】在数据库函数里直接使用jit,如果在高压调用此数据库函数时会非常慢,因为每次执行都会调用jit。
CREATE OR REPLACE FUNCTION public.myleast6a(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$

from numba import jit

@jit
def test2(b):
r = None
for x in b:
if r is None or x < r:
r = x
return r

return test2(a)
$function$

----执行测试函数
select count(*) filter (where a = myleast6a(a,b,c,d,e)) from foo;
--8分钟还未计算完,强制终止掉上面测试


#python模块: module1
#导入jit
from numba import jit

@jit
def test2(b):
r = None
for x in b:
if r is None or x < r:
r = x
return r

def test3(b):
r = None
for x in b:
if r is None or x < r:
r = x
return r

def test4(b):
return min(b)

--python数据库函数调用python模块使用jit
CREATE OR REPLACE FUNCTION public.myleast6b(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$

import module1
return module1.test2(a)
$function$;

----执行测试函数
select count(*) filter (where a = myleast6b(a,b,c,d,e)) from foo;
--execution time: 00:01:07; total time: 00:01:07

--python数据库函数调用python模块不使用jit
CREATE OR REPLACE FUNCTION public.myleast6c(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$

import module1
return module1.test3(a)
$function$;

----执行测试函数
select count(*) filter (where a = myleast6c(a,b,c,d,e)) from foo;
--execution time: 9.672 sec; total time: 9.688 sec

--python数据库函数调用python模块直接使用内部min函数,不使用jit
CREATE OR REPLACE FUNCTION public.myleast6d(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$
import module1
return module1.test4(a)
$function$;

----执行测试函数
select count(*) filter (where a = myleast6d(a,b,c,d,e)) from foo;
--execution time: 10.609 sec; total time: 10.625 sec

--python数据库函数调用python模块直接使用内部min函数,不使用jit
CREATE OR REPLACE FUNCTION public.myleast6e(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$
return min(a)
$function$;

----执行测试函数
select count(*) filter (where a = myleast6e(a,b,c,d,e)) from foo;
--execution time: 7.906 sec; total time: 7.922 sec

--使用python调用内置的C语言实现的least函数,【注意】此方式存在性能问题。
CREATE OR REPLACE FUNCTION public.myleast6f(a integer, b integer, c integer, d integer, e integer )
RETURNS integer LANGUAGE plpython3u
AS $function$
if "plan" in SD:
plan = SD["plan"]
else:
plan = plpy.prepare("SELECT least($1, $2,$3,$4,$5) as f1", ["int", "int", "int", "int", "int"])
SD["plan"] = plan

rv = plpy.execute(plan, [a,b,c,d,e], 1)
t = rv[0]["f1"]
return t
$function$;

--执行测试函数
select count(*) filter (where a = myleast6f(a,b,c,d,e)) from foo;
--execution time: 34.141 sec; total time: 34.141 sec

--使用GD对象缓存jit的函数
--【注意】此方法不用每次执行调用jit,但也说明jit不适合非科学计算领域,比常规方式速度反而下降非常多。
CREATE OR REPLACE FUNCTION public.myleast6g(VARIADIC a integer[])
RETURNS integer LANGUAGE plpython3u
AS $function$

if "test2" in GD:
test2 = GD["test2"]
else :
from numba import jit

@jit
def test2(b):
r = None
for x in b:
if r is None or x < r:
r = x
return r
GD["test2"] = test2

return test2(a)
$function$

--执行测试函数
select count(*) filter (where a = myleast6g(a,b,c,d,e)) from foo;
--execution time: 00:01:02; total time: 00:01:02

--#import imp
--#imp.reload(module1)

CREATE or replace FUNCTION myleast7_lua(a int, b int, c int, d int, e int) RETURNS int AS $$
return math.min(a,b,c,d,e);
$$ LANGUAGE pllua;

select count(*) filter (where a = myleast7_lua(a,b,c,d,e)) from foo;
--execution time: 8.851

--删除测试函数
drop FUNCTION public.myleast1(VARIADIC integer[]);
drop FUNCTION public.myleast1a(a integer, b integer, c integer, d integer, e integer)
drop FUNCTION public.myleast1b(VARIADIC integer[]);
drop FUNCTION public.myleast1c(VARIADIC integer[]);
drop FUNCTION public.myleast2(VARIADIC integer[]);
drop FUNCTION public.myleast3(VARIADIC integer[]);
drop FUNCTION public.myleast6(VARIADIC a integer[]);
drop FUNCTION public.myleast6a(VARIADIC a integer[]);
drop FUNCTION public.myleast6b(VARIADIC a integer[]);
drop FUNCTION public.myleast6c(VARIADIC a integer[]);
drop FUNCTION public.myleast6d(VARIADIC a integer[]);
drop FUNCTION public.myleast6e(VARIADIC a integer[]);
drop FUNCTION public.myleast6f(a integer, b integer, c integer, d integer, e integer);
drop FUNCTION public.myleast6g(VARIADIC a integer[]);
drop FUNCTION public.myleast7_lua(a integer, b integer, c integer, d integer, e integer);

--删除临时表
drop table foo;

/*
【结论】
1)C语言实现的函数最快远超过其他语言,
需要时间0.531秒,比最快的脚本语言6.687秒快12.59倍
2)pl/pgsql > pl/python3u > pllua

*/

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant