我需要创建一个包含三个列(sprint_name、begin_date、end_date)的hive表。我已经填充了sprint_name列,基于此,我希望在两个星期的增量中填充其他两个列。基本上,短跑持续两周。如果不能使用hive,请帮助我了解如何在sql server中创建它。
Sprint_name Begin_date End_date
Sprint 1.1 METADATA FYE20 2/6/2019 2/19/2019
Sprint 1.2 METADATA FYE20 2/20/2019 3/5/2019
Sprint 1.3 METADATA FYE20 3/6/2019 3/19/2019
Sprint 1.4 METADATA FYE20
Sprint 1.5 METADATA FYE20
Sprint 1.6 METADATA FYE20
Sprint 1.6 METADATA SALE FYE20
Sprint 1.7 METADATA FYE20
Sprint 2.1 METADATA FYE20
Sprint 2.2 METADATA FYE20
Sprint 2.3 METADATA FYE20
Sprint 2.3 METADATA SALE FYE20
Sprint 2.3 METADATA DOWN FYE20
Sprint 2.4 METADATA FYE20
Sprint 2.5 METADATA FYE20
Sprint 2.6 METADATA FYE20 7/10/2019 7/23/2019
Sprint 2.7 SALE FYE20 7/24/2019 8/6/2019
Sprint 2.7 METADATA FYE20 7/24/2019 8/6/2019
Sprint 3.1 METADATA FYE20
.
.
.
Sprint 4.6 METADATA FYE20
.
.
Sprint 1.1 METADATA FYE21发布于 2019-08-06 16:14:07
还有一个技巧,就是使用CTE更新表格,并在蜂巢中使用Posexplode。
CREATE TABLE IF NOT EXISTS db.test_date
(
userid int
,sprint_name string
,start_date date
,end_date date
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS orc
;在开始日期和结束日期中插入第一条记录,其余条目的保留日期为null。
INSERT INTO TABLE db.test_date VALUES
(1,'Sprint 1.1 METADATA FYE20','2019-02-06','2019-02-19' ),
(2,'Sprint 1.2 METADATA FYE20',NULL,NULL),
(3,'Sprint 1.3 METADATA FYE20',NULL,NULL ),
(4,'Sprint 1.4 METADATA FYE20',NULL,NULL),
(5,'Sprint 1.5 METADATA FYE20',NULL,NULL),
(6,'Sprint 1.6 METADATA FYE20',NULL,NULL),
(7,'Sprint 1.6 METADATA SALE ',NULL,NULL),
(8,'Sprint 1.7 METADATA FYE20',NULL,NULL),
(9,'Sprint 2.1 METADATA FYE20',NULL,NULL),
(10,'Sprint 2.2 METADATA FYE20',NULL,NULL);查询:
with
CTE AS (
select date_add(start_date,((pe.i)* 14)) as start_date,date_add(end_date,((pe.i)*14)) as end_date,pe.i+1 as userid
from db.test_date
lateral view
posexplode(split(space(10-1),' ')) pe as i,x
where start_date is not null
)
insert overwrite table db.test_date
select
t.Userid
,t.sprint_name
,d.start_date
,d.end_date
FROM db.test_date t
CROSS JOIN CTE d
on d.userid=t.userid;
注意:我想使用posexplode(split(space(max(userid)-min(userid),‘’) pe作为i,x,但是max & min不支持。
结果:
hive> select * from db.test_date;
OK
1 Sprint 1.1 METADATA FYE20 2019-02-06 2019-02-19
2 Sprint 1.2 METADATA FYE20 2019-02-20 2019-03-05
3 Sprint 1.3 METADATA FYE20 2019-03-06 2019-03-19
4 Sprint 1.4 METADATA FYE20 2019-03-20 2019-04-02
5 Sprint 1.5 METADATA FYE20 2019-04-03 2019-04-16
6 Sprint 1.6 METADATA FYE20 2019-04-17 2019-04-30
7 Sprint 1.6 METADATA SALE 2019-05-01 2019-05-14
8 Sprint 1.7 METADATA FYE20 2019-05-15 2019-05-28
9 Sprint 2.1 METADATA FYE20 2019-05-29 2019-06-11
10 Sprint 2.2 METADATA FYE20 2019-06-12 2019-06-25发布于 2019-08-05 06:17:52
在Server中,假设第一条记录有begin_date和end_date值,则可以使用window函数更新下一行的日期值。
CREATE TABLE Table1
(
Id Int Identity(1,1),Sprint_name varchar(max),Begin_date date, end_date date
)
INSERT INTO Table1 Values
('Sprint 1.1 METADATA FYE20' , '2/6/2019' , '2/19/2019' )
,('Sprint 1.2 METADATA FYE20' , NULL, NULL)
,('Sprint 1.3 METADATA FYE20',NULL,NULL)--insert all sprint_name values and NULL in begin_Date,end_date columns
;with cte as(
select
Id,
sprint_name,
ISNULL(dateadd(day,((id-1)*14),first_value(begin_Date) over (order by id rows unbounded preceding)),begin_date) as begin_date,
ISNULL(dateadd(day,((id-1)*14),first_value(end_date) over (order by id rows unbounded preceding)),end_date) as end_date
from Table1
)
update T
set T.Begin_Date=C.Begin_Date,
T.End_Date = C.End_Date
from Table1 T
inner join CTE C
on C.Id = T.Id;发布于 2019-08-05 08:13:50
如果你想在蜂巢里做这件事的话,在下面添加答案。
CREATE TABLE IF NOT EXISTS test_dev_db.test_date
(
Userid int
,sprint_name string
,start_date date
,end_date date
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS orc
;仅作为一个示例加载四个记录,就可以加载所有记录。
INSERT INTO TABLE test_dev_db.test_date VALUES
(1,'Sprint 1.1','2019-02-06','2019-02-19' ),
(2,'Sprint 1.2',NULL,NULL),
(3,'Sprint 1.3',NULL,NULL ),
(4,'Sprint 1.4',NULL,NULL);
with date_range as(
select
Userid
,sprint_name
,date_add(first_value(start_date) over (order by Userid rows unbounded preceding),((Userid-1)*14)) as start_date
,date_add(first_value(end_date) over (order by Userid rows unbounded preceding),((Userid-1)*14)) as end_date
from test_dev_db.test_date
)
insert overwrite table test_dev_db.test_date
select date_range.Userid,date_range.sprint_name
,date_range.start_date
,date_range.end_date from date_range;
hive> select * from test_dev_db.test_date;
OK
1 Sprint 1.1 2019-02-06 2019-02-19
2 Sprint 1.2 2019-02-20 2019-03-05
3 Sprint 1.3 2019-03-06 2019-03-19
4 Sprint 1.4 2019-03-20 2019-04-02
Time taken: 0.206 seconds, Fetched: 4 row(s)https://stackoverflow.com/questions/57351053
复制相似问题