我使用的是ClickHouse,我想知道哪个“搜索词”在不同的日子里是不同的路径值
我的数据
date searchword path
1 2019-10-02 word1 url-1
2 2019-10-03 word1 url-2
3 2019-10-04 word1 url-1
4 2019-10-11 word1 url-1
5 2019-10-07 word2 url-4
6 2019-10-08 word3 url-5
7 2019-10-09 word3 url-5
8 2019-10-10 word3 url-5
9 2019-10-08 word4 url-5
10 2019-10-09 word4 url-5
11 2019-10-10 word4 url-5
12 2019-10-10 word4 url-6
13 2019-10-11 word4 url-5
14 2019-10-10 word5 url-7
15 2019-10-09 word5 url-8
16 2019-10-11 word5 url-7期望结果
word1
word5但不是word4,因为没有交换值,而是同一天的两个值
word1 (两条路径,每天一条,第2行的值是url2,其他的日子,第1行和第3行的值是url1 )
word5 (多个值,值为url-7的第14行和第15 url-8行).
我想我应该使用sequenceMatch或windowFunnel函数,知道吗?
发布于 2020-02-04 19:59:30
请尝试以下查询:
SELECT searchword
FROM (
/* intermediate result of SELECT below
┌─searchword─┬─date_paths_array──────────────────────────────────────────────────────────────────────────────────────────────┬─isExchanged─┐
│ word3 │ [('2019-10-09',['url-5']),('2019-10-10',['url-5']),('2019-10-08',['url-5'])] │ 0 │
│ word5 │ [('2019-10-10',['url-7']),('2019-10-11',['url-7']),('2019-10-09',['url-8'])] │ 1 │
│ word1 │ [('2019-10-04',['url-1']),('2019-10-02',['url-1']),('2019-10-03',['url-2']),('2019-10-11',['url-1'])] │ 1 │
│ word4 │ [('2019-10-08',['url-5']),('2019-10-10',['url-5','url-6']),('2019-10-09',['url-5']),('2019-10-11',['url-5'])] │ 0 │
└────────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────┘
*/
SELECT
searchword,
groupArray(date_paths) date_paths_array,
arrayFirstIndex((x, index) ->
index != length(date_paths_array)
AND hasAny(x.2, date_paths_array[index + 1].2) = 0,
date_paths_array,
arrayEnumerate(date_paths_array)) > 0 isExchanged
FROM (
/* intermediate result of SELECT below
┌─searchword─┬─date_paths───────────────────────┐
│ word1 │ ('2019-10-04',['url-1']) │
│ word3 │ ('2019-10-09',['url-5']) │
│ word5 │ ('2019-10-10',['url-7']) │
│ word1 │ ('2019-10-02',['url-1']) │
│ word4 │ ('2019-10-08',['url-5']) │
│ word2 │ ('2019-10-07',['url-4']) │
│ word5 │ ('2019-10-11',['url-7']) │
│ word5 │ ('2019-10-09',['url-8']) │
│ word3 │ ('2019-10-10',['url-5']) │
│ word1 │ ('2019-10-03',['url-2']) │
│ word3 │ ('2019-10-08',['url-5']) │
│ word1 │ ('2019-10-11',['url-1']) │
│ word4 │ ('2019-10-10',['url-5','url-6']) │
│ word4 │ ('2019-10-09',['url-5']) │
│ word4 │ ('2019-10-11',['url-5']) │
└────────────┴──────────────────────────────────┘
*/
SELECT searchword, (date, groupArray(path)) date_paths
FROM (
/* test dataset */
SELECT d.1 order_num, toDate(d.2) date, d.3 searchword, d.4 path
FROM (
SELECT arrayJoin([
(1, '2019-10-02', 'word1', 'url-1'),
(2, '2019-10-03', 'word1', 'url-2'),
(3, '2019-10-04', 'word1', 'url-1'),
(4, '2019-10-11', 'word1', 'url-1'),
(5, '2019-10-07', 'word2', 'url-4'),
(6, '2019-10-08', 'word3', 'url-5'),
(7, '2019-10-09', 'word3', 'url-5'),
(8, '2019-10-10', 'word3', 'url-5'),
(9, '2019-10-08', 'word4', 'url-5'),
(10, '2019-10-09', 'word4', 'url-5'),
(11, '2019-10-10', 'word4', 'url-5'),
(12, '2019-10-10', 'word4', 'url-6'),
(13, '2019-10-11', 'word4', 'url-5'),
(14, '2019-10-10', 'word5', 'url-7'),
(15, '2019-10-09', 'word5', 'url-8'),
(16, '2019-10-11', 'word5', 'url-7')]) d))
WHERE date >= '2019-10-01' AND date < '2019-11-01'
GROUP BY searchword, date)
GROUP BY searchword
HAVING count() > 1)
WHERE isExchanged;
/* Result:
┌─searchword─┐
│ word5 │
│ word1 │
└────────────┘
*/https://stackoverflow.com/questions/60053800
复制相似问题