首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >PostgreSQL函数优化

PostgreSQL函数优化
EN

Stack Overflow用户
提问于 2018-01-25 17:14:16
回答 1查看 99关注 0票数 0

我在JSON专栏中获得了一些来自外部提供商的车辆信息,其想法是将这些信息与我们的信息关联起来,这样我们就可以将我们自己的车辆映射到这个外部车辆上。我们使用"diff“值来评估关系,以便了解最佳映射和最差映射。

问题:函数运行缓慢。这可能需要2-3秒。简而言之,它在vehicles表中搜索(有几十万行),检索许多候选车辆(例如+300),并执行"diff“函数对每个候选车辆进行评估(考虑到属性的权重)。

我需要增强这个函数及其内部函数的性能。

目前,我几乎可以肯定,优化以下两个函数就足够了:

代码语言:javascript
复制
-- JSON DIFF
CREATE OR REPLACE FUNCTION public.json_diff(
l jsonb,
r jsonb)
RETURNS real AS
$BODY$
DECLARE
resultado real := 0.0;
peso real := 1.0;
suma_pesos real := 0.0;
atr_l text;
atr_r text;
sigma real;
diff_atr real;

arrElement1 text;

BEGIN
FOR atr_l IN 
    SELECT key FROM jsonb_each(l)
LOOP
    IF ( (SELECT "Valor" FROM config."PesosAtributos" WHERE "Clave" LIKE atr_l) > 0 ) THEN
        suma_pesos := suma_pesos + (SELECT "Valor" FROM config."PesosAtributos" WHERE "Clave" LIKE atr_l);
    ELSE
        suma_pesos := suma_pesos + 0.1;
    END IF;
END LOOP;

--RAISE NOTICE '%', suma_pesos;

IF (l IS NULL OR l = '{}' OR r IS NULL OR r = '{}')
    THEN resultado := 1.0;
        --RAISE NOTICE 'NULL or empty';
ELSE
    FOR atr_l IN 
        SELECT key FROM jsonb_each(l)
    LOOP
        IF ( (SELECT COUNT("Valor") FROM config."PesosAtributos" WHERE "Clave" LIKE atr_l) = 1 ) THEN
            peso := (SELECT "Valor" FROM config."PesosAtributos" WHERE "Clave" LIKE atr_l);
            sigma := (SELECT "Sigma" FROM config."PesosAtributos" WHERE "Clave" LIKE atr_l);                
        ELSE
            peso := 0.1;
            sigma := 0.5;
        END IF;         

        IF ( (SELECT COUNT(*) FROM jsonb_each(r) WHERE key = atr_l) = 0)
            THEN resultado := resultado + peso;
                --RAISE NOTICE '% doesn''t exist in right JSON. ', atr_l;
        ELSE
            IF (is_numeric(l ->> atr_l) AND is_numeric(r ->> atr_l)) THEN
                diff_atr := peso * diff_numeric((l ->> atr_l)::real, (r ->> atr_l)::real, sigma);
                --RAISE NOTICE  '%: %, %. Peso: %. Sigma: %. Diff: %.', atr_l, (l ->> atr_l), (r ->> atr_l), peso, sigma, diff_atr;
            ELSIF (l ->> atr_l LIKE '[%]') THEN
                IF (r ->> atr_l LIKE '[%]') THEN
                --RAISE NOTICE 'LEFT ARRAY AND RIGHT ARRAY';
                diff_atr := peso * 1.0;
                FOR arrElement1 IN
                    SELECT jsonb_array_elements_text(l -> atr_l)
                LOOP
                    --RAISE NOTICE '%', arrElement1;
                    IF (arrElement1 IN (SELECT jsonb_array_elements_text(r -> atr_l))) THEN
                        diff_atr := peso * 0;
                        EXIT;
                    END IF;
                END LOOP;
                ELSE
                IF (r -> atr_l IN (jsonb_array_elements_text(l -> atr_l))) THEN
                    diff_atr := peso * 0;
                ELSE 
                    diff_atr := peso * 1.0;
                END IF;
                END IF;
            ELSIF (r ->> atr_l LIKE '[%]') THEN

                IF (l -> atr_l IN (jsonb_array_elements_text(r -> atr_l))) THEN
                diff_atr := peso * 0;
                ELSE 
                diff_atr := peso * 1.0;
                END IF;
            ELSE
                diff_atr := peso * ( 1 - similarity(l ->> atr_l, r ->> atr_l)::real );  
                --RAISE NOTICE  '%: %, %. Peso: %. Diff: %.', atr_l, (l ->> atr_l), (r ->> atr_l), peso, diff_atr;      
            END IF;

            resultado := resultado + diff_atr;              
        END IF;

    END LOOP;
END IF;
--RAISE NOTICE 'RESULTADO FINAL: %. SUMA PESOS: %', resultado, suma_pesos;
RETURN resultado / suma_pesos;
END;
$BODY$
  LANGUAGE plpgsql STABLE
  COST 100;
ALTER FUNCTION public.json_diff(jsonb, jsonb)
OWNER TO postgres;


-- SEARCH FOR VEHICLES
CREATE OR REPLACE FUNCTION public.generate_similar_versions_with_base_entity(
tipo integer,
id integer,
top integer)
RETURNS TABLE("Diff" real, "Id" integer, "CreatedAt" bigint, "ModifiedAt" bigint, "IdVersionVehiculo" integer, "IdVersionVehiculoProveedor" integer, "Codigo" text) 
LANGUAGE 'plpgsql'

COST 100
VOLATILE
AS $BODY$

DECLARE 
marca text;
modelo text;
anyo_ini int;
anyo_fin int;
anyo_fabr int;
anyo_matr int;
modelo_similar text;

BEGIN
SELECT vvp."DatosSideoJson" ->> 'Marca',
vvp."DatosSideoJson" ->> 'Modelo',
vvp."DatosSideoJson" ->> 'AnyoInicio',
vvp."DatosSideoJson" ->> 'AnyoFin',
vvp."DatosSideoJson" ->> 'AnyoFabricacion',
vvp."DatosSideoJson" ->> 'AnyoMatriculacion'
INTO marca, modelo, anyo_ini, anyo_fin, anyo_fabr, anyo_matr
FROM public."VersionesVehiculoProveedor" AS vvp WHERE vvp."Id" = id;

IF (anyo_fin IS NULL) THEN anyo_fin = 0; END IF;
IF (top = -1) THEN top = NULL; END IF;

SELECT get_similar_model_group(marca, modelo) INTO modelo_similar;

IF modelo_similar IS NOT NULL THEN

    DELETE FROM public."VinsToVersionVehiculo" WHERE public."VinsToVersionVehiculo"."IdVersionVehiculoProveedor" = id;

    INSERT INTO public."VinsToVersionVehiculo" (
    "Diff",
    "IdVersionVehiculoProveedor",
    "IdVersionVehiculo",
    "CreatedAt",
    "ModifiedAt"
    )
    SELECT json_diff((SELECT vvp."DatosSideoJson" FROM public."VersionesVehiculoProveedor" AS vvp WHERE vvp."Id" = id ), j2.valores2),
    id,
    j2."IdVersion",
    0::bigint,
    0::bigint
    FROM (
    SELECT versiones."ValoresAtributosJSONB" AS valores2, versiones."Id" AS "IdVersion", marcas."Nombre" AS "Marca", modelos."Nombre" AS "Modelo", 
        modelos."Grupo" AS "Grupo", modelos."Subgrupo" AS "Subgrupo", versiones."AnyoInicio" AS "AnyoInicio", versiones."AnyoFin" AS "AnyoFin",
        mapeos."Codigo" AS "Codigo"
    FROM public."VersionesVehiculo" AS versiones
    INNER JOIN public."MapeosVersionVehiculoProveedor" AS mapeos ON versiones."Id" = mapeos."IdVersionVehiculo"
    INNER JOIN public."Modelos" AS modelos ON versiones."IdModelo" = modelos."Id"
    INNER JOIN public."Marcas" AS marcas ON modelos."IdMarca" = marcas."Id"
    INNER JOIN public."TiposVehiculo" AS tipos ON marcas."IdTipoVehiculo" = tipos."Id"
    WHERE modelos."Grupo" LIKE modelo_similar -- (modelos."AnyoInicio" <= anyo AND (modelos."AnyoFin" = 0 OR anyo <= modelos."AnyoFin"))
        AND (
            (anyo_fabr IS NOT NULL AND anyo_fabr > 0 AND year_is_in_range(anyo_fabr, modelos."AnyoInicio", modelos."AnyoFin") > 0.2)
            OR (anyo_matr IS NOT NULL AND anyo_matr > 0 AND year_is_in_range(anyo_matr, modelos."AnyoInicio", modelos."AnyoFin") > 0.2)
            OR (compare_years_interval(modelos."AnyoInicio", modelos."AnyoFin", anyo_ini, anyo_fin, 2)) 
        )
        AND versiones."ValoresAtributosJSONB" IS NOT NULL       
    ) AS j2
    ORDER BY "Diff"
    LIMIT top;
END IF;

END

$BODY$;

ALTER FUNCTION public.generate_similar_versions_with_base_entity(integer, integer, integer)
OWNER TO postgres;

重函数json_diff的执行计划:

代码语言:javascript
复制
[
{
"Execution Time": 1004.942,
"Planning Time": 2.85,
"Plan": {
  "Inner Unique": false,
  "Startup Cost": 2.88,
  "Actual Rows": 305,
  "Plans": [
    {
      "Filter": "(\"Id\" = 6008)",
      "Startup Cost": 0,
      "Actual Rows": 1,
      "Node Type": "Seq Scan",
      "Plan Rows": 1,
      "Relation Name": "VersionesVehiculoProveedor",
      "Alias": "vvp",
      "Parallel Aware": false,
      "Actual Total Time": 0.008,
      "Parent Relationship": "InitPlan",
      "Actual Startup Time": 0.006,
      "Plan Width": 341,
      "Subplan Name": "InitPlan 1 (returns $0)",
      "Actual Loops": 1,
      "Rows Removed by Filter": 2,
      "Total Cost": 2.04
    },
    {
      "Inner Unique": false,
      "Startup Cost": 0.42,
      "Actual Rows": 298,
      "Plans": [
        {
          "Inner Unique": true,
          "Startup Cost": 0,
          "Join Filter": "(marcas.\"IdTipoVehiculo\" = tipos.\"Id\")",
          "Actual Rows": 3,
          "Plans": [
            {
              "Inner Unique": false,
              "Startup Cost": 0,
              "Join Filter": "(modelos.\"IdMarca\" = marcas.\"Id\")",
              "Actual Rows": 3,
              "Plans": [
                {
                  "Startup Cost": 0,
                  "Actual Rows": 77,
                  "Node Type": "Seq Scan",
                  "Plan Rows": 77,
                  "Relation Name": "Marcas",
                  "Alias": "marcas",
                  "Parallel Aware": false,
                  "Actual Total Time": 0.024,
                  "Parent Relationship": "Outer",
                  "Actual Startup Time": 0.018,
                  "Plan Width": 8,
                  "Actual Loops": 1,
                  "Total Cost": 1.77
                },
                {
                  "Startup Cost": 0,
                  "Actual Rows": 3,
                  "Plans": [
                    {
                      "Filter": "((\"Grupo\" ~~ 'Fiesta'::text) AND compare_years_interval(\"AnyoInicio\", \"AnyoFin\", 2003, 2008, 2))",
                      "Startup Cost": 0,
                      "Actual Rows": 3,
                      "Node Type": "Seq Scan",
                      "Plan Rows": 3,
                      "Relation Name": "Modelos",
                      "Alias": "modelos",
                      "Parallel Aware": false,
                      "Actual Total Time": 0.348,
                      "Parent Relationship": "Outer",
                      "Actual Startup Time": 0.141,
                      "Plan Width": 8,
                      "Actual Loops": 1,
                      "Rows Removed by Filter": 2505,
                      "Total Cost": 698.35
                    }
                  ],
                  "Node Type": "Materialize",
                  "Plan Rows": 3,
                  "Parallel Aware": false,
                  "Actual Total Time": 0.005,
                  "Parent Relationship": "Inner",
                  "Actual Startup Time": 0.002,
                  "Plan Width": 8,
                  "Actual Loops": 77,
                  "Total Cost": 698.37
                }
              ],
              "Node Type": "Nested Loop",
              "Plan Rows": 3,
              "Join Type": "Inner",
              "Rows Removed by Join Filter": 228,
              "Parallel Aware": false,
              "Actual Total Time": 0.437,
              "Parent Relationship": "Outer",
              "Actual Startup Time": 0.392,
              "Plan Width": 8,
              "Actual Loops": 1,
              "Total Cost": 703.59
            },
            {
              "Startup Cost": 0,
              "Actual Rows": 1,
              "Plans": [
                {
                  "Startup Cost": 0,
                  "Actual Rows": 1,
                  "Node Type": "Seq Scan",
                  "Plan Rows": 2,
                  "Relation Name": "TiposVehiculo",
                  "Alias": "tipos",
                  "Parallel Aware": false,
                  "Actual Total Time": 0.006,
                  "Parent Relationship": "Outer",
                  "Actual Startup Time": 0.006,
                  "Plan Width": 4,
                  "Actual Loops": 1,
                  "Total Cost": 1.02
                }
              ],
              "Node Type": "Materialize",
              "Plan Rows": 2,
              "Parallel Aware": false,
              "Actual Total Time": 0.003,
              "Parent Relationship": "Inner",
              "Actual Startup Time": 0.003,
              "Plan Width": 4,
              "Actual Loops": 3,
              "Total Cost": 1.03
            }
          ],
          "Node Type": "Nested Loop",
          "Plan Rows": 3,
          "Join Type": "Inner",
          "Rows Removed by Join Filter": 0,
          "Parallel Aware": false,
          "Actual Total Time": 0.452,
          "Parent Relationship": "Outer",
          "Actual Startup Time": 0.402,
          "Plan Width": 4,
          "Actual Loops": 1,
          "Total Cost": 704.69
        },
        {
          "Filter": "(\"ValoresAtributosJSONB\" IS NOT NULL)",
          "Startup Cost": 0.42,
          "Scan Direction": "Forward",
          "Plan Width": 598,
          "Rows Removed by Index Recheck": 0,
          "Actual Rows": 99,
          "Node Type": "Index Scan",
          "Index Cond": "(\"IdModelo\" = modelos.\"Id\")",
          "Plan Rows": 63,
          "Relation Name": "VersionesVehiculo",
          "Alias": "versiones",
          "Parallel Aware": false,
          "Actual Total Time": 0.344,
          "Parent Relationship": "Inner",
          "Actual Startup Time": 0.026,
          "Total Cost": 9.34,
          "Actual Loops": 3,
          "Rows Removed by Filter": 0,
          "Index Name": "\"IX_VersionesVehiculo_IdModelo\""
        }
      ],
      "Node Type": "Nested Loop",
      "Plan Rows": 169,
      "Join Type": "Inner",
      "Parallel Aware": false,
      "Actual Total Time": 1.646,
      "Parent Relationship": "Outer",
      "Actual Startup Time": 0.431,
      "Plan Width": 594,
      "Actual Loops": 1,
      "Total Cost": 734.61
    },
    {
      "Startup Cost": 0.42,
      "Scan Direction": "Forward",
      "Plan Width": 19,
      "Rows Removed by Index Recheck": 0,
      "Actual Rows": 1,
      "Node Type": "Index Scan",
      "Index Cond": "(\"IdVersionVehiculo\" = versiones.\"Id\")",
      "Plan Rows": 1,
      "Relation Name": "MapeosVersionVehiculoProveedor",
      "Alias": "mapeos",
      "Parallel Aware": false,
      "Actual Total Time": 0.012,
      "Parent Relationship": "Inner",
      "Actual Startup Time": 0.01,
      "Total Cost": 0.45,
      "Actual Loops": 298,
      "Index Name": "\"IX_MapeosVersionVehiculoProveedor_IdVersionVehiculo\""
    }
  ],
  "Node Type": "Nested Loop",
  "Plan Rows": 169,
  "Join Type": "Inner",
  "Parallel Aware": false,
  "Actual Total Time": 1004.734,
  "Actual Startup Time": 4.083,
  "Plan Width": 31,
  "Actual Loops": 1,
  "Total Cost": 856.96
},
"Triggers": []
}
]

谢谢。

EN

回答 1

Stack Overflow用户

发布于 2018-01-25 18:54:54

从您的计划中,我在两个表上看到了SeqScan:

代码语言:javascript
复制
INNER JOIN public."Modelos" AS modelos ON versiones."IdModelo" = modelos."Id"
INNER JOIN public."Marcas" AS marcas ON modelos."IdMarca" = marcas."Id"

我想

代码语言:javascript
复制
create index on public."Modelos"("Id")
create index on public."Marcas"("Id")

可能会加快行刑速度

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/48439261

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档