关于重复的数据,我们有一个有趣的问题。我们的源文件可以包含单个员工福利的多行数据。例如,医疗福利可以分成3行报告(3行只是一个例子-可能更多)。
数据被加载到暂存中,没有任何问题,并且将包含单个和多个受益行的混合。到目前为止,我所做的是执行聚合,以找到与多个好处( employeeid和benefitid上的聚合)的标识,如果它> 1,则将其移动到一个单独的暂存表中。
我现在需要做的是分析具有多种好处的分期表,并根据employeeid和benefitid汇总数据。让我们假设一个好处有3行。
员工和benefitid将是识别这些行的关键。其他行可能包括福利提供者、年度成本、合格和福利名称(以及更多的行,但这些行涵盖了输出单个行所需的4种聚合类型)。
在将数据聚合后(应用上述规则,将3行合并为1 ),然后将其与单行福利表合并。
发布于 2016-01-13 16:56:59
由于您使用的是2012+,所以您可以使用窗口函数,我认为这使得解决这个问题更容易。
这是我的装备。
IF NOT EXISTS
(
SELECT *
FROM
sys.tables AS T
INNER JOIN
sys.schemas AS S
ON s.schema_id = t.schema_id
WHERE
S.name ='dbo' AND T.name = 'StagingBenefit'
)
BEGIN
CREATE TABLE
dbo.StagingBenefit
(
StagingBenefitSK int identity(1,1) NOT NULL
, EmployeeID int NOT NULL
, BenefitID int NOT NULL
, BenefitProvider char(1) NOT NULL
, Cost int NOT NULL
, FlagA bit NOT NULL
, FlagB bit NOT NULL
, FlagC bit NOT NULL
, BenefitName varchar(30) NULL
, Scenario varchar(30) NOT NULL
);
END
TRUNCATE TABLE dbo.StagingBenefit;
INSERT INTO
dbo.StagingBenefit
SELECT
*
FROM
(
VALUES
(1,1, 'A', 100, 0, 0, 0, 'Teeth', 'SingleRow')
, (2,2, 'A', 200, 0, 0, 0, 'Teeth', 'DoubleRow, Samesies')
, (2,2, 'A', 222, 0, 0, 0, 'Teeth', 'DoubleRow, Samesies')
, (3,3, 'A', 300, 0, 0, 0, 'Teeth', 'ThreeRow,DoubleFlagged')
, (3,3, 'A', 330, 1, 0, 0, 'Elbow', 'ThreeRow,DoubleFlagged')
, (3,3, 'A', 333, 0, 1, 0, 'Elbow', 'ThreeRow,DoubleFlagged')
, (4,4, 'A', 400, 0, 0, 0, 'Teeth', 'MultipleProviders')
, (4,4, 'A', 440, 0, 0, 0, 'First', 'MultipleProviders')
, (4,4, 'B', 444, 0, 0, 1, NULL, 'MultipleProviders')
)D
(
EmployeeID
, BenefitID
, BenefitProvider
, Cost
, FlagA
, FlagB
, FlagC
, BenefitName
, Scenario
)
;然后,我会查看一个类似于这样的查询
SELECT DISTINCT
SB.EmployeeID
, SB.BenefitID
,
CASE
WHEN
MIN(SB.BenefitProvider) OVER (PARTITION BY SB.EmployeeID, SB.BenefitID)
<> MAX(SB.BenefitProvider) OVER (PARTITION BY SB.EmployeeID, SB.BenefitID)
THEN 'Multiple'
ELSE
MIN(SB.BenefitProvider) OVER (PARTITION BY SB.EmployeeID, SB.BenefitID)
END AS BenefitProvider
-- SUM this
, SUM(SB.Cost) OVER (PARTITION BY SB.EmployeeID, SB.BenefitID) As TotalCost
, CAST(MAX(CAST(SB.FlagA AS int)) OVER (PARTITION BY SB.EmployeeID, SB.BenefitID) AS bit) AS FlagA
, CAST(MAX(CAST(SB.FlagB AS int)) OVER (PARTITION BY SB.EmployeeID, SB.BenefitID) AS bit) AS FlagB
, CAST(MAX(CAST(SB.FlagC AS int)) OVER (PARTITION BY SB.EmployeeID, SB.BenefitID) AS bit) AS FlagC
, MIN(SB.BenefitName) OVER
(
PARTITION BY SB.EmployeeID, SB.BenefitID
) AS FirstBenefit
, SB.Scenario
FROM
dbo.StagingBenefit AS SB浏览查询的一些技巧
https://dba.stackexchange.com/questions/126129
复制相似问题