在使用COPY INTO @mystage时,是否有开箱即用的方法让雪花将列中的值作为文件名使用?其目标是将X个文件复制到s3阶段(基本上是由column1进行分区),但直接复制到阶段,而不是创建子文件夹。X是列中不同值的数目。
这显然可以手动完成:
copy into @mystage/mycustomfilename
然而,更好的选择是这样的:
copy into @mystage/$column1
有雪花支持的版本吗?
如上所述,PARTITION BY设置将数据解析为子文件夹,子文件夹使用指定列中的值命名,但雪花仍然在每个子文件夹中使用通用文件名。
发布于 2022-03-29 22:33:43
创造的结构-
create temporary table temp_tab_split_members(seq_id number, member_id number, name varchar2(30));
+----------------------------------------------------+
| status |
|----------------------------------------------------|
| Table TEMP_TAB_SPLIT_MEMBERS successfully created. |
+----------------------------------------------------+假数据-
insert into temp_tab_split_members
with cte as
(select seq4(),(trim(mod(seq4(),4))+1)::integer,'my name-'||seq4() from table(generator(rowcount=>12)))
select * from cte;
+-------------------------+
| number of rows inserted |
|-------------------------|
| 12 |
+-------------------------+检查数据格式-
select * from TEMP_TAB_SPLIT_MEMBERS order by member_id;
+--------+-----------+------------+
| SEQ_ID | MEMBER_ID | NAME |
|--------+-----------+------------|
| 0 | 1 | my name-0 |
| 4 | 1 | my name-4 |
| 8 | 1 | my name-8 |
| 1 | 2 | my name-1 |
| 5 | 2 | my name-5 |
| 9 | 2 | my name-9 |
| 2 | 3 | my name-2 |
| 6 | 3 | my name-6 |
| 10 | 3 | my name-10 |
| 3 | 4 | my name-3 |
| 7 | 4 | my name-7 |
| 11 | 4 | my name-11 |
+--------+-----------+------------+检查阶段为空
list @test_row_stage;
+------+------+-----+---------------+
| name | size | md5 | last_modified |
|------+------+-----+---------------|
+------+------+-----+---------------+生成文件的主要过程
EXECUTE IMMEDIATE $$
DECLARE
company varchar2(30);
BU varchar2(30);
eval_desc varchar2(30);
member_id varchar2(30);
file_name varchar2(30);
c1 CURSOR FOR SELECT distinct member_id FROM temp_tab_split_members;
BEGIN
for record in c1 do
member_id:=record.member_id;
file_name:='load'||'_'||member_id||'.csv';
execute immediate 'copy into @test_row_stage/'||:file_name||' from
(select * from temp_tab_split_members where member_id='||:member_id||') overwrite=false';
end for;
RETURN 0;
END;
$$
;
+-----------------+
| anonymous block |
|-----------------|
| 0 |
+-----------------+检查过程执行后的阶段内容
list @test_row_stage; -- output truncated columnwise
+----------------------------------------+------+
| name | size |
|----------------------------------------+------+
| test_row_stage/load_1.csv_0_0_0.csv.gz | 48 |
| test_row_stage/load_2.csv_0_0_0.csv.gz | 48 |
| test_row_stage/load_3.csv_0_0_0.csv.gz | 48 |
| test_row_stage/load_4.csv_0_0_0.csv.gz | 48 |文件内容交叉核对
select $1,$2,$3 from @test_row_stage/load_1.csv_0_0_0.csv.gz union
select $1,$2,$3 from @test_row_stage/load_2.csv_0_0_0.csv.gz union
select $1,$2,$3 from @test_row_stage/load_3.csv_0_0_0.csv.gz union
select $1,$2,$3 from @test_row_stage/load_4.csv_0_0_0.csv.gz;
+----+----+------------+
| $1 | $2 | $3 |
|----+----+------------|
| 0 | 1 | my name-0 |
| 4 | 1 | my name-4 |
| 8 | 1 | my name-8 |
| 1 | 2 | my name-1 |
| 5 | 2 | my name-5 |
| 9 | 2 | my name-9 |
| 2 | 3 | my name-2 |
| 6 | 3 | my name-6 |
| 10 | 3 | my name-10 |
| 3 | 4 | my name-3 |
| 7 | 4 | my name-7 |
| 11 | 4 | my name-11 |
+----+----+------------+发布于 2022-03-28 22:00:27
据我所知,这方面没有OOB,但您可以编写自定义代码和获取值,并使用它们命名文件并将其复制到stage/s3。请参阅下面类似的内容-
EXECUTE IMMEDIATE $$
DECLARE
company varchar2(30);
BU varchar2(30);
eval_desc varchar2(30);
member_id varchar2(30);
file_name varchar2(30);
c1 CURSOR FOR SELECT * FROM test_pivot;
BEGIN
for record in c1 do
company:=record.company;
BU:=record.BU;
eval_desc:=record.eval_desc;
member_id:=record.member_id;
file_name:='load'||'_'||member_id||'.csv';
create or replace temporary table temp_test_pvt(company varchar2(30),BU varchar2(30),eval_desc varchar2(30),member_id varchar2(30));
insert into temp_test_pvt values (:company,:bu,:eval_desc,:member_id);
execute immediate 'copy into @test_row_stage/'||:file_name||' from (select * from temp_test_pvt) overwrite=false';
end for;
RETURN 0;
END;
$$
;另外,请参考这里的类似帖子- Copy JSON data from Snowflake into S3
https://stackoverflow.com/questions/71654138
复制相似问题