我正在创建一个存储过程,它在一个表上循环(有数千行),对于每一行都有一个CLOB列,我想从该列中获取正则表达式(sa "FNR")上的所有匹配。此后,我希望在一个新表中插入每个不同的匹配。单个CLOB列可能包含数千个匹配项,但通常在CLOB中重复的是相同的"FNR“--也就是说,不同的正则匹配要少得多,这是我唯一感兴趣的匹配。然而,我所做的这个过程花费了相当长的时间,我怀疑在所有的比赛中循环是最耗时的部分。
我的手术是这样的:
CREATE OR REPLACE PROCEDURE SP_MTV_FINN_FNR AS
BEGIN
DECLARE
v_n NUMBER;
v_cnt NUMBER;
v_mtrid NUMBER;
v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
v_doc CLOB;
v_fnr VARCHAR2(11);
BEGIN
-- Get all rows from table --
SELECT COUNT(*) INTO v_n FROM TABLE;
IF v_n > 0 THEN
-- Loop over all rows --
FOR i IN 1..v_n LOOP
SELECT doc, mtrid
INTO v_doc, v_mtrid
FROM (SELECT DOC doc, ID mtrid, ROWNUM rnum
FROM TABLE
WHERE ROWNUM <=i)
WHERE rnum >= i;
IF v_doc IS NOT NULL THEN
SELECT REGEXP_COUNT(v_doc, v_regex_fnr) INTO v_cnt FROM DUAL;
IF v_cnt >= 1 THEN
-- For each regex match - time consuming, right? --
FOR j IN 1..v_cnt LOOP
SELECT REGEXP_SUBSTR(v_doc, v_regex_fnr, 1, j, 'm') INTO v_fnr FROM DUAL;
IF CHECK_FNR(v_fnr) = 'TRUE' THEN
INSERT INTO TABLE2(MTR_ID, FNR)
SELECT v_mtrid, v_fnr FROM DUAL;
END IF;
END LOOP;
END IF;
END IF;
COMMIT;
END LOOP;
END IF;
END LOOP;
EXCEPTION WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Error - rollback');
DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
ROLLBACK;
END;/
有谁知道如何优化这个程序吗?
我正在使用Oracle 11.2.0.3.0。(顺便说一句,我知道ctx_entity-包,但是在这个版本中它是禁用的。不过,我仍在考虑启用它。)
更新
在应用了nop77svk给出的非常有用的性能优化技术之后,我可以肯定地说,regexp_substr()在CLOB上是瓶颈,因为不幸的是没有性能改进。然而,我想出了一个“黑客/解决方案”,将regexp_substr()调用的数量降到最低,从而极大地提高了性能。首先,我想做一个增量式的“训练”regex,不包括以前的比赛,但由于Oracle不支持消极的前瞻性,这是行不通的。最后,我保存了CLOB,并使用regexp_replace()删除了所有匹配项。由于CLOB中出现了许多相同的情况,这避免了大量regexp_substr()调用的过程,并同时处理了不同的需求。
下面是我的结果,基于nop77svk的贡献。是的,我回到了在MERGE-statement中使用MERGE-statement,但是这里有什么方法可以绕过它吗?
CREATE OR REPLACE PROCEDURE SP_MTV_FINN_FNR2 AS
BEGIN
DECLARE
v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
v_fnr VARCHAR2(11);
v_doc CLOB;
type rec_table2 is record (
mtr_id table2.mtr_id%type,
fnr table2.fnr%type
);
type arr_table2 is table of rec_table2 index by simple_integer;
table2_bulk arr_table2;
table2_row rec_table2;
BEGIN
FOR rec IN (
select doc, MTR_ID as mtrid
from TABLE
where DOC is not null
) LOOP
v_doc := rec.doc;
loop
v_fnr := REGEXP_SUBSTR(v_doc, v_regex_fnr, 1, 1, 'm');
exit when v_fnr is null;
v_vedlegg := REGEXP_REPLACE(v_doc, v_fnr , '' , 1 , 0); -- Incrementally remove all occurences of match from doc --
IF CHECK_FNR(v_fnr) = 'TRUE' THEN
table2_row.mtr_id := rec.mtrid;
table2_row.fnr := v_fnr;
table2_bulk(table2_bulk.count+1) := table2_row;
END IF;
END LOOP;
END LOOP;
forall i in indices of table2_bulk
MERGE INTO TABLE2 T
USING (SELECT table2_bulk(i).mtr_id mtrid, table2_bulk(i).fnr fnr FROM DUAL) B
ON (T.MTR_ID = B.mtrid AND T.FNR = B.fnr)
WHEN NOT MATCHED THEN INSERT (T.MTR_ID, T.FNR)
VALUES (B.mtrid, B.fnr);
COMMIT;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Error - rollback');
DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
ROLLBACK;
END;
END;
/发布于 2014-11-04 19:12:21
迭代地调优PL/SQL块。
迭代0:修复语法错误..。
CREATE OR REPLACE PROCEDURE SP_MTV_FINN_FNR AS
v_n NUMBER;
v_cnt NUMBER;
v_mtrid NUMBER;
v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
v_doc CLOB;
v_fnr VARCHAR2(11);
BEGIN
-- Get all rows from table --
SELECT COUNT(*) INTO v_n FROM TABLE;
IF v_n > 0 THEN
-- Loop over all rows --
FOR i IN 1..v_n LOOP
SELECT doc, mtrid
INTO v_doc, v_mtrid
FROM (SELECT DOC doc, ID mtrid, ROWNUM rnum
FROM TABLE
WHERE ROWNUM <=i)
WHERE rnum >= i;
IF v_doc IS NOT NULL THEN
SELECT REGEXP_COUNT(v_doc, v_regex_fnr) INTO v_cnt FROM DUAL;
IF v_cnt >= 1 THEN
-- For each regex match - time consuming, right? --
FOR j IN 1..v_cnt LOOP
SELECT REGEXP_SUBSTR(v_doc, v_regex_fnr, 1, j, 'm') INTO v_fnr FROM DUAL;
IF CHECK_FNR(v_fnr) = 'TRUE' THEN
INSERT INTO TABLE2(MTR_ID, FNR)
SELECT v_mtrid, v_fnr FROM DUAL;
END IF;
END LOOP;
END IF;
END IF;
COMMIT;
END LOOP;
END IF;
EXCEPTION WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Error - rollback');
DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
ROLLBACK;
END;迭代1:删除不必要的上下文切换和无用的行计数.
DECLARE
v_cnt NUMBER;
v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
v_fnr VARCHAR2(11);
BEGIN
FOR rec IN (
select doc, id as mtrid
from table
) LOOP
IF rec.doc IS NOT NULL THEN
v_cnt := REGEXP_COUNT(rec.doc, v_regex_fnr);
IF v_cnt >= 1 THEN
-- For each regex match - time consuming, right? --
FOR j IN 1..v_cnt LOOP
v_fnr := REGEXP_SUBSTR(rec.doc, v_regex_fnr, 1, j, 'm');
IF CHECK_FNR(v_fnr) = 'TRUE' THEN
INSERT INTO TABLE2(MTR_ID, FNR) values (rec.mtrid, v_fnr);
END IF;
END LOOP;
END IF;
END IF;
END LOOP;
COMMIT;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Error - rollback');
DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
ROLLBACK;
END;
/迭代2:减少外循环数
DECLARE
v_cnt NUMBER;
v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
v_fnr VARCHAR2(11);
BEGIN
FOR rec IN (
select doc, id as mtrid
from table
where doc is not null
) LOOP
v_cnt := REGEXP_COUNT(rec.doc, v_regex_fnr);
IF v_cnt >= 1 THEN
-- For each regex match - time consuming, right? --
FOR j IN 1..v_cnt LOOP
v_fnr := REGEXP_SUBSTR(rec.doc, v_regex_fnr, 1, j, 'm');
IF CHECK_FNR(v_fnr) = 'TRUE' THEN
INSERT INTO TABLE2(MTR_ID, FNR) values (rec.mtrid, v_fnr);
END IF;
END LOOP;
END IF;
END LOOP;
COMMIT;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Error - rollback');
DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
ROLLBACK;
END;
/迭代3:缩短了迭代2的代码。
DECLARE
v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
v_fnr VARCHAR2(11);
BEGIN
FOR rec IN (
select doc, id as mtrid, REGEXP_COUNT(rec.doc, v_regex_fnr) as regexp_cnt
from table
where doc is not null
and regexp_like(doc, v_regex_fnt)
) LOOP
FOR j IN 1..rec.regexp_cnt LOOP
v_fnr := REGEXP_SUBSTR(rec.doc, v_regex_fnr, 1, j, 'm');
IF CHECK_FNR(v_fnr) = 'TRUE' THEN
INSERT INTO TABLE2(MTR_ID, FNR) values (rec.mtrid, v_fnr);
END IF;
END LOOP;
END LOOP;
COMMIT;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Error - rollback');
DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
ROLLBACK;
END;
/迭代4:删除不必要的regexp_count()计数..。
DECLARE
v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
v_fnr VARCHAR2(11);
j integer;
BEGIN
FOR rec IN (
select doc, id as mtrid
from table
where doc is not null
) LOOP
j := 1;
loop
v_fnr := REGEXP_SUBSTR(rec.doc, v_regex_fnr, 1, j, 'm');
exit when v_fnt is null;
IF CHECK_FNR(v_fnr) = 'TRUE' THEN
INSERT INTO TABLE2(MTR_ID, FNR) values (rec.mtrid, v_fnr);
END IF;
j := j + 1;
END LOOP;
END LOOP;
COMMIT;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Error - rollback');
DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
ROLLBACK;
END;
/迭代5:将结果保存到内存并立即将其刷新到DB (使用集合绑定),再加上处理不同的需求.
create or replace type obj_table2
as
object (
mtr_id integer,
fnr varchar2(4000)
);
/
create or replace type arr_table2
as
table of obj_table2;
/
DECLARE
v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
v_fnr VARCHAR2(11);
j integer;
table2_bulk arr_table2 := arr_table2();
BEGIN
FOR rec IN (
select doc, id as mtrid
from table
where doc is not null
) LOOP
j := 1;
loop
v_fnr := REGEXP_SUBSTR(rec.doc, v_regex_fnr, 1, j, 'm');
exit when v_fnt is null;
IF CHECK_FNR(v_fnr) = 'TRUE' THEN
table2_bulk.extend();
table2_bulk(table2_bulk.last) := new obj_table2(
mtr_id => rec.mtrid,
fnr => v_fnr
);
END IF;
j := j + 1;
END LOOP;
END LOOP;
insert into table2(mtr_id, fnr)
select mtr_id, fnr
from table(table2_bulk) X
minus
select mtr_id, fnr
from table2;
COMMIT;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Error - rollback');
DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
ROLLBACK;
END;
/迭代6:在决定使用令人发指地炫耀 .
insert into table2 (mtr_id, fnr)
with xyz (doc, mtrid, fnr, j) as (
select doc, id as mtrid, cast(null as varchar2(4000)) as fnr, 0 as j
from table A
where doc is not null
--
union all
--
select doc, mtrid,
regexp_substr(doc, '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))', 1, j+1, 'm') as fnr,
j+1
from xyz X
where j = 0
or j > 0 and X.fnr is not null
)
select distinct mtrid, fnr
from xyz
where j > 0
and fnr is not null
and CHECK_FNR(fnr) = 'TRUE'
;
commit;--请注意,,这些代码段甚至可能无法工作。由于您没有为我们提供任何测试数据设置,所以我们只能以假设的方式调优您的代码。
请注意,其中最慢的部分仍然是CLOB值上的regexp_substr()。您可能需要考虑使用regexp_substr()的regexp_substr()参数而不是occurence参数来获得后续的regexp匹配。
好好享受吧。
发布于 2014-11-04 17:06:29
使用静态调用删除DUAL引用。这样就可以在PL/SQL和SQL引擎之间节省不必要的context switching。
我还添加了隐式游标到逐条记录的处理中。
下一个改进级别可以是将散装插入放到另一个表中。虽然不是我在这里做的。
CREATE OR REPLACE PROCEDURE SP_MTV_FINN_FNR AS
BEGIN
DECLARE
v_n NUMBER;
v_cnt NUMBER;
v_mtrid NUMBER;
v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
v_doc CLOB;
v_fnr VARCHAR2(11);
BEGIN
-- Get all rows from table --
/* Lets go with a Implicit cursor */
FOR MYREC IN (SELECT DOC doc, ID mtrid
FROM TABLE)
LOOP
IF MYREC.DOC IS NOT NULL THEN
v_cnt := REGEXP_COUNT(MYREC.DOC, v_regex_fnr);
IF v_cnt >= 1 THEN
-- For each regex match - time consuming, right? --
FOR j IN 1..v_cnt LOOP
v_fnr := REGEXP_SUBSTR(MYREC.DOC, v_regex_fnr, 1, j, 'm');
IF CHECK_FNR(v_fnr) = 'TRUE' THEN
INSERT INTO TABLE2(MTR_ID, FNR)
VALUES (MYREC.MTRID,v_fnr);
END IF;
END LOOP;
END IF;
COMMIT;
END IF;
END LOOP;
EXCEPTION WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Error - rollback');
DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
ROLLBACK;
END;https://stackoverflow.com/questions/26740267
复制相似问题