二百七十六、ClickHouse——Hive和ClickHouse非常不同的DWS指标数据SQL语句

一、目的

在完成数据之后对业务指标进行分析,Hive和ClickHouseSQL真不一样

二、部分业务指标表

2.1 统计数据流量表1天周期

2.1.1 Hive中原有代码

2.1.1.1 Hive中建表语句
--1、统计数据流量表——动态分区——1天周期
create  table  if not exists  hurys_db.dws_statistics_volume_1day(device_no        string         comment '设备编号',scene_name       string         comment '场景名称',lane_no          int            comment '车道编号',lane_direction   string         comment '车道流向',section_no       int            comment '断面编号',device_direction string         comment '雷达朝向',sum_volume_day   int            comment '每天总流量',week_day         string         comment '周几',month            string         comment '月份'
)
comment '统计数据流量表——动态分区——1天周期'
partitioned by (day string)
stored as orc
;
2.1.1.2 Hive中SQL语句
--动态加载数据
insert  overwrite  table  hurys_db.dws_statistics_volume_1day  partition(day)
selectdwd_st.device_no,dwd_sc.scene_name,dwd_st.lane_no,dwd_rl.lane_direction,dwd_st.section_no,dwd_rc.device_direction,sum(volume_sum) sum_volume_day,case when pmod(datediff(create_time,'2023-11-27') + 1,7) = 1 then '周一'when pmod(datediff(create_time,'2023-11-27') + 1,7) = 2 then '周二'when pmod(datediff(create_time,'2023-11-27') + 1,7) = 3 then '周三'when pmod(datediff(create_time,'2023-11-27') + 1,7) = 4 then '周四'when pmod(datediff(create_time,'2023-11-27') + 1,7) = 5 then '周五'when pmod(datediff(create_time,'2023-11-27') + 1,7) = 6 then '周六'else '周日' end as week_day,substr(day,1,7) month,day
from hurys_db.dwd_statistics as dwd_stright join hurys_db.dwd_radar_lane as dwd_rlon dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_noright join hurys_db.dwd_device_scene as dwd_dson dwd_ds.device_no=dwd_st.device_noright join hurys_db.dwd_scene as dwd_scon dwd_sc.scene_id = dwd_ds.scene_idright join hurys_db.dwd_radar_config as dwd_rcon dwd_rc.device_no=dwd_st.device_no
where dwd_st.create_time is not null   and   dwd_st.day='2024-09-05'
group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, case when pmod(datediff(create_time,'2023-11-27') + 1,7) = 1 then '周一'when pmod(datediff(create_time,'2023-11-27') + 1,7) = 2 then '周二'when pmod(datediff(create_time,'2023-11-27') + 1,7) = 3 then '周三'when pmod(datediff(create_time,'2023-11-27') + 1,7) = 4 then '周四'when pmod(datediff(create_time,'2023-11-27') + 1,7) = 5 then '周五'when pmod(datediff(create_time,'2023-11-27') + 1,7) = 6 then '周六'else '周日' end, day
;

2.1.2 ClickHouse中现有代码

2.1.2.1 ClickHouse中表结构
--1、统计数据流量表——动态分区——1天周期
create  table  if not exists  hurys_jw.dws_statistics_volume_1day(device_no        String                   comment '设备编号',scene_name       String                   comment '场景名称',lane_no          Nullable(Int32)          comment '车道编号',lane_direction   Nullable(String)         comment '车道流向',section_no       Nullable(Int32)          comment '断面编号',device_direction Nullable(String)         comment '雷达朝向',sum_volume_day   Nullable(Int32)          comment '每天总流量',week_day         Nullable(String)         comment '周几',month            Nullable(String)         comment '月份',day              Date                    comment '日期'
)
ENGINE = MergeTree
PARTITION BY day
PRIMARY KEY day
ORDER BY day
SETTINGS index_granularity = 8192;
2.1.2.2 ClickHouse中SQL语句
--动态加载数据
selectdwd_st.device_no,dwd_sc.scene_name,dwd_st.lane_no,dwd_rl.lane_direction,dwd_st.section_no,dwd_rc.device_direction,sum(volume_sum) sum_volume_day,
       case when toDayOfWeek(create_time) = 1 then '周一'when toDayOfWeek(create_time) = 2 then '周二'when toDayOfWeek(create_time) = 3 then '周三'when toDayOfWeek(create_time) = 4 then '周四'when toDayOfWeek(create_time) = 5 then '周五'when toDayOfWeek(create_time) = 6 then '周六'when toDayOfWeek(create_time) = 7 then '周日'end as week_day,
    concat(toString(toYear(dwd_st.day)), '-', lpad(toString(toMonth(dwd_st.day)), 2, '0')) AS month,cast(dwd_st.day as String) day
from hurys_jw.dwd_statistics as dwd_stright join hurys_jw.dwd_radar_lane as dwd_rlon dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_noright join hurys_jw.dwd_device_scene as dwd_dson dwd_ds.device_no=dwd_st.device_noright join hurys_jw.dwd_scene as dwd_scon dwd_sc.scene_id = dwd_ds.scene_idright join hurys_jw.dwd_radar_config as dwd_rcon dwd_rc.device_no=dwd_st.device_no
where dwd_st.create_time is not null and dwd_st.lane_no is not null  and   dwd_st.day >= ?
group by  dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, case when toDayOfWeek(create_time) = 1 then '周一'when toDayOfWeek(create_time) = 2 then '周二'when toDayOfWeek(create_time) = 3 then '周三'when toDayOfWeek(create_time) = 4 then '周四'when toDayOfWeek(create_time) = 5 then '周五'when toDayOfWeek(create_time) = 6 then '周六'when toDayOfWeek(create_time) = 7 then '周日'end, dwd_st.day
;

2.2 统计数据流量表5分钟周期

2.2.1 Hive中原有代码

2.2.1.1 Hive中建表语句
--5、统计数据流量表——动态分区——5分钟周期
create  table  if not exists  hurys_db.dws_statistics_volume_5min(device_no        string         comment '设备编号',scene_name       string         comment '场景名称',lane_no          int            comment '车道编号',lane_direction   string         comment '车道流向',section_no       int            comment '断面编号',device_direction string         comment '雷达朝向',sum_volume_5min  int            comment '每5分钟总流量',start_time       timestamp      comment '开始时间'
)
comment '统计数据流量表——动态分区——5分钟周期'
partitioned by (day string)
stored as orc
;
2.2.1.2 Hive中SQL语句
--动态加载数据
insert  overwrite  table  hurys_db.dws_statistics_volume_5min  partition(day)
selectdwd_st.device_no,dwd_sc.scene_name,dwd_st.lane_no,dwd_rl.lane_direction,dwd_st.section_no,dwd_rc.device_direction,sum(volume_sum)   sum_volume_5min,case   when  minute(create_time ) < 5 thenconcat(substr(create_time, 1, 14), '00:00')when minute(create_time) >=5 and minute(create_time) <10 thenconcat(substr(create_time, 1, 14), '05:00')when minute(create_time) >=10 and minute(create_time) <15 thenconcat(substr(create_time, 1, 14), '10:00')when minute(create_time) >=15 and minute(create_time) <20 thenconcat(substr(create_time, 1, 14), '15:00')when minute(create_time) >=20 and minute(create_time) <25 thenconcat(substr(create_time, 1, 14), '20:00')when minute(create_time) >=25 and minute(create_time) <30 thenconcat(substr(create_time, 1, 14), '25:00')when minute(create_time) >=30 and minute(create_time) <35 thenconcat(substr(create_time, 1, 14), '30:00')when minute(create_time) >=35 and minute(create_time) <40 thenconcat(substr(create_time, 1, 14), '35:00')when minute(create_time) >=40 and minute(create_time) <45 thenconcat(substr(create_time, 1, 14), '40:00')when minute(create_time) >=45 and minute(create_time) <50 thenconcat(substr(create_time, 1, 14), '45:00')when minute(create_time) >=50 and minute(create_time) <55 thenconcat(substr(create_time, 1, 14), '50:00')elseconcat(substr(create_time, 1, 14), '55:00') end as start_time,day
from hurys_db.dwd_statistics as dwd_stright join hurys_db.dwd_radar_lane as dwd_rlon dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_noright join hurys_db.dwd_device_scene as dwd_dson dwd_ds.device_no=dwd_st.device_noright join hurys_db.dwd_scene as dwd_scon dwd_sc.scene_id = dwd_ds.scene_idright join hurys_db.dwd_radar_config as dwd_rcon dwd_rc.device_no=dwd_st.device_no
where dwd_st.create_time is not null   and   dwd_st.day='2024-09-05'
group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, case   when  minute(create_time ) < 5 thenconcat(substr(create_time, 1, 14), '00:00')when minute(create_time) >=5 and minute(create_time) <10 thenconcat(substr(create_time, 1, 14), '05:00')when minute(create_time) >=10 and minute(create_time) <15 thenconcat(substr(create_time, 1, 14), '10:00')when minute(create_time) >=15 and minute(create_time) <20 thenconcat(substr(create_time, 1, 14), '15:00')when minute(create_time) >=20 and minute(create_time) <25 thenconcat(substr(create_time, 1, 14), '20:00')when minute(create_time) >=25 and minute(create_time) <30 thenconcat(substr(create_time, 1, 14), '25:00')when minute(create_time) >=30 and minute(create_time) <35 thenconcat(substr(create_time, 1, 14), '30:00')when minute(create_time) >=35 and minute(create_time) <40 thenconcat(substr(create_time, 1, 14), '35:00')when minute(create_time) >=40 and minute(create_time) <45 thenconcat(substr(create_time, 1, 14), '40:00')when minute(create_time) >=45 and minute(create_time) <50 thenconcat(substr(create_time, 1, 14), '45:00')when minute(create_time) >=50 and minute(create_time) <55 thenconcat(substr(create_time, 1, 14), '50:00')elseconcat(substr(create_time, 1, 14), '55:00') end, day
;

2.2.2 ClickHouse中现有代码

2.2.2.1 ClickHouse中表结构
--5、统计数据流量表——动态分区——5分钟周期
create  table  if not exists  hurys_jw.dws_statistics_volume_5min(device_no        String                   comment '设备编号',scene_name       String                   comment '场景名称',lane_no          Nullable(Int32)          comment '车道编号',lane_direction   Nullable(String)         comment '车道流向',section_no       Nullable(Int32)          comment '断面编号',device_direction Nullable(String)         comment '雷达朝向',sum_volume_5min  Nullable(Int32)          comment '每5分钟总流量',start_time       DateTime                 comment '开始时间',day              Date                    comment '日期'
)
ENGINE = MergeTree
PARTITION BY day
PRIMARY KEY day
ORDER BY day
SETTINGS index_granularity = 8192;
2.2.2.2 ClickHouse中SQL语句
--动态加载数据
selectdwd_st.device_no,dwd_sc.scene_name,dwd_st.lane_no,dwd_rl.lane_direction,dwd_st.section_no,dwd_rc.device_direction,sum(volume_sum)   sum_volume_5min,
        toDateTime(concat(toString(toDate(create_time)),' ',lpad(toString(extract(hour FROM create_time)), 2, '0'),':',CASEWHEN extract(minute FROM create_time) < 5 THEN '00'WHEN extract(minute FROM create_time) >= 5 AND extract(minute FROM create_time) < 10 THEN '05'WHEN extract(minute FROM create_time) >= 10 AND extract(minute FROM create_time) < 15 THEN '10'WHEN extract(minute FROM create_time) >= 15 AND extract(minute FROM create_time) < 20 THEN '15'WHEN extract(minute FROM create_time) >= 20 AND extract(minute FROM create_time) < 25 THEN '20'WHEN extract(minute FROM create_time) >= 25 AND extract(minute FROM create_time) < 30 THEN '25'WHEN extract(minute FROM create_time) >= 30 AND extract(minute FROM create_time) < 35 THEN '30'WHEN extract(minute FROM create_time) >= 35 AND extract(minute FROM create_time) < 40 THEN '35'WHEN extract(minute FROM create_time) >= 40 AND extract(minute FROM create_time) < 45 THEN '40'WHEN extract(minute FROM create_time) >= 45 AND extract(minute FROM create_time) < 50 THEN '45'WHEN extract(minute FROM create_time) >= 50 AND extract(minute FROM create_time) < 55 THEN '50'ELSE '55'END,':00'))  as start_time,cast(dwd_st.day as String) day
from hurys_jw.dwd_statistics as dwd_stright join hurys_jw.dwd_radar_lane as dwd_rlon dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_noright join hurys_jw.dwd_device_scene as dwd_dson dwd_ds.device_no=dwd_st.device_noright join hurys_jw.dwd_scene as dwd_scon dwd_sc.scene_id = dwd_ds.scene_idright join hurys_jw.dwd_radar_config as dwd_rcon dwd_rc.device_no=dwd_st.device_no
where dwd_st.create_time is not null   and  dwd_st.lane_no is not null   and   dwd_st.day >= ?
group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, toDateTime(concat(toString(toDate(create_time)),' ',lpad(toString(extract(hour FROM create_time)), 2, '0'),':',CASEWHEN extract(minute FROM create_time) < 5 THEN '00'WHEN extract(minute FROM create_time) >= 5 AND extract(minute FROM create_time) < 10 THEN '05'WHEN extract(minute FROM create_time) >= 10 AND extract(minute FROM create_time) < 15 THEN '10'WHEN extract(minute FROM create_time) >= 15 AND extract(minute FROM create_time) < 20 THEN '15'WHEN extract(minute FROM create_time) >= 20 AND extract(minute FROM create_time) < 25 THEN '20'WHEN extract(minute FROM create_time) >= 25 AND extract(minute FROM create_time) < 30 THEN '25'WHEN extract(minute FROM create_time) >= 30 AND extract(minute FROM create_time) < 35 THEN '30'WHEN extract(minute FROM create_time) >= 35 AND extract(minute FROM create_time) < 40 THEN '35'WHEN extract(minute FROM create_time) >= 40 AND extract(minute FROM create_time) < 45 THEN '40'WHEN extract(minute FROM create_time) >= 45 AND extract(minute FROM create_time) < 50 THEN '45'WHEN extract(minute FROM create_time) >= 50 AND extract(minute FROM create_time) < 55 THEN '50'ELSE '55'END,':00')), cast(dwd_st.day as String)
;

就先这样,反正ClickHouse和Hive的SQL语句非常非常不一样!!!

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.rhkb.cn/news/461487.html

如若内容造成侵权/违法违规/事实不符,请联系长河编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

在IDEA2024中生成SpringBoot模板

1、创建新项目 根据自己想要创建的工程类型选择&#xff0c;这里创建的时web工程 生成项目&#xff1a; 注意&#xff1a;SpringBoot只会扫描主程序所在的包及其下面的子包

物流行业信息化整体规划方案|117页PPT

文件是关于“物流行业信息化整体规划方案”的详细规划报告&#xff0c;涵盖了物流信息化咨询项目的规划报告&#xff0c;包括业务理解与需求分析、信息化现状分析、信息化蓝图规划以及实施路径与保障措施等多个方面。以下是对文档内容的总结&#xff1a; 1. 引言&#xff1a;信…

opencv优秀文章集合

文章目录 一、 CV领域1.1 图像处理1.2 目标检测与识别1.3 图像分割、目标追踪1.4 姿态估计1.5 3D视觉1.6 图像生成1.7 机器视觉1.8 其它 二、 nlp三、语音四、推荐系统 《OpenCV优秀文章集合》《OpenCV系列课程一&#xff1a;图像处理入门&#xff08;读写、拆分合并、变换、注…

Windows转Mac过渡指南

最近由于工作原因开始使用mac电脑&#xff0c;说实话刚拿到手的时候&#xff0c;window党表示真的用不惯。坚持用一下午之后&#xff0c;发现真的yyds&#xff0c;这篇文章说说mac电脑的基本入门指南。 1. 不会使用mac的触摸板&#xff0c;接上鼠标发现滚轮和windows是反的。 …

字符串逆序(c语言)

错误代码 #include<stdio.h>//字符串逆序 void reverse(char arr[], int n) {int j 0;//采用中间值法//访问数组中第一个元素和最后一个元素//交换他们的值&#xff0c;从而完成了字符串逆序//所以这个需要临时变量for (j 0; j < n / 2; j){char temp arr[j];arr[…

安全成为大模型的核心;大模型安全的途径:大模型对齐

目录 安全成为大模型的核心 大模型安全的途径:大模型对齐 人类反馈强化学习(RLHF) 直接偏好优化(DPO) 安全成为大模型的核心 大模型安全的途径:大模型对齐 大模型对齐技术(Alignment Techniques for Large Language Models)是确保大规模语言模型(例如GPT-4)的输…

K8s企业应用之容器化迁移

#作者&#xff1a;曹付江 K8s企业应用之容器化迁移 Kubernetes&#xff08;K8s&#xff09;中的企业应用容器化迁移是一个复杂但重要的过程&#xff0c;平滑的迁移应用&#xff0c;可以让开发、运维、测试人员循序渐进的学习和掌握Kubernetes&#xff0c;通常包括以下步骤&am…

redis详细教程(3.hash和set类型)

hash Redis中的Hash是一种数据结构&#xff0c;用于存储键值对集合。在Redis中&#xff0c;Hash非常适合表示对象&#xff0c;其中对象的每个字段都对应一个键值对。以下是关于Redis中Hash的详细讲解&#xff1a; 特点&#xff1a; 1. 键值对集合&#xff1a;Hash是一个包含…

linux 安装php扩展:xlswriter

这里以xlswriter扩展为例 进入官方扩展&#xff1a;https://pecl.php.net查询自己php对应版本的扩展包 下载扩展 wget https://pecl.php.net/get/xlswriter-1.5.5.tgz 解压扩展 tar -zxvf xlswriter-1.5.5.tgz 进入扩展目录 cd xlswriter-1.5.5 查找对应php版本的phpiz…

SSID,即Service Set Identifier(服务设置的表示符号)

一、什么是SSID&#xff1f; SSID&#xff0c;即Service Set Identifier&#xff0c;是无线网络中的一个标识符&#xff0c;用于区分不同的无线网络。它可以理解为无线网络的名称&#xff0c;当我们在手机或电脑上搜索可用的无线网络时&#xff0c;就是通过SSID来识别和连接的…

LabVIEW过程控制实验平台

A3000实验平台通过LabVIEW开发&#xff0c;实现了过程控制的虚拟仿真与实时通信&#xff0c;显著提高了教学与实验的互动性和效率。该平台采用模块化设计&#xff0c;支持多种控制策略的实验教学&#xff0c;克服了传统实验设备的不足。项目背景 目前高校过程控制实验设备普遍…

强大的文本编辑器Notepad++8.4.6 最新版

Notepad最新版是一款多功能的代码编辑工具。Notepad官方版支持27种编程语言&#xff0c;涵盖C、C 、Java 、C#,、XML、 HTML,、PHP、python等等&#xff0c;能够帮助程序员提高编辑效率。Notepad软件支持python与sql代码高亮功能&#xff0c;并且免费开源&#xff0c;能够完美地…

Halcon 2D测量Metrology找线/圆/矩形/椭圆

通过2D测量&#xff0c;可以获取物体的范围、方向、角度、位置、尺寸和个数等特征。其中&#xff0c;Halcon的2D Metrology模块提供了亚像素级别的卡尺测量功能&#xff0c;可以测量的几何形状包括直线、圆、椭圆、矩形等。对于2D度量&#xff0c;必须提供要测量的对象的位置&a…

PostgreSQL的学习心得和知识总结(一百五十七)|新的 COPY 选项 LOG_VERBOSITY

目录结构 注&#xff1a;提前言明 本文借鉴了以下博主、书籍或网站的内容&#xff0c;其列表如下&#xff1a; 1、参考书籍&#xff1a;《PostgreSQL数据库内核分析》 2、参考书籍&#xff1a;《数据库事务处理的艺术&#xff1a;事务管理与并发控制》 3、PostgreSQL数据库仓库…

分析 std::optional 的使用与常见错误

文章目录 引言常见错误及解决方案1. 错误使用 std::optional 变量进行算术运算2. 错误检查 std::optional 是否有值3. 忽视 std::optional 的默认值 结论 引言 std::optional 是 C17 引入的一个模板类&#xff0c;用于表示可能有也可能没有值的情况。它特别适用于函数返回值&a…

大模型中的token是什么;常见大语言模型的 token 情况

目录 大模型中的token是什么 常见大语言模型的 token 情况 大模型中的token是什么 定义 在大模型中,token 是文本处理的基本单位。它可以是一个字、一个词,或者是其他被模型定义的语言单元。简单来说,模型在理解和生成文本时,不是以完整的句子或段落为单位进行一次性处理…

深度了解flink(七) JobManager(1) 组件启动流程分析

前言 JobManager是Flink的核心进程&#xff0c;主要负责Flink集群的启动和初始化&#xff0c;包含多个重要的组件(JboMaster&#xff0c;Dispatcher&#xff0c;WebEndpoint等)&#xff0c;本篇文章会基于源码分析JobManagr的启动流程&#xff0c;对其各个组件进行介绍&#x…

深度学习模型入门教程指南

在当前的人工智能生成内容&#xff08;AIGC&#xff09;领域中&#xff0c;深度学习模型无疑是支撑其技术核心的关键组件。深度学习模型的广泛应用极大地推动了图像生成、自然语言处理和自动化工作流的发展&#xff0c;本文将从多个角度介绍深度学习模型的概念、构建过程、实际…

C语言指针的介绍

零.导言 在日常生活中&#xff0c;我们常常在外出时居住酒店&#xff0c;细心的你一定能发现酒店不同的房间上有着不同的门牌号&#xff0c;上面写着像308&#xff0c;512之类的数字。当你定了酒店之后&#xff0c;你就会拿到一个写有门牌号的钥匙&#xff0c;凭着钥匙就能进入…

【Spring MVC】DispatcherServlet 请求处理流程

一、 请求处理 Spring MVC 是 Spring 框架的一部分&#xff0c;用于构建 Web 应用程序。它遵循 MVC&#xff08;Model-View-Controller&#xff09;设计模式&#xff0c;将应用程序分为模型&#xff08;Model&#xff09;、**视图&#xff08;View&#xff09;和控制器&#x…