牛客网SQL训练4—SQL进阶挑战

2023-12-27 23:56:58


一、增删改操作

1. 插入记录

题目1:插入记录 (一)】
在这里插入图片描述

插入记录的方式汇总:
1. 普通插入(全字段):INSERT INTO table_name VALUES (value1, value2, …)
?普通插入(限定字段):INSERT INTO table_name (column1, column2, …) VALUES (value1, value2, …)
?多条一次性插入:INSERT INTO table_name (column1, column2, …) VALUES (value1_1, value1_2, …), (value2_1, value2_2, …), …
2. 从另一个表导入:INSERT INTO table_name SELECT * FROM table_name2 [WHERE key=value]
3. 带更新的插入:REPLACE INTO table_name VALUES (value1, value2, …) (注意这种原理是检测到主键或唯一性索引键重复就删除原记录后重新插入)

-- 因为id为自增,所以插入两条数据除自增id列以外的列
-- 提交时间可以让它自己计算
insert into exam_record(uid,exam_id,start_time,submit_time,score) values
(1001,9001,'2021-09-01 22:11:12','2021-09-01 22:11:12'+interval 50 minute,90),
(1002,9002,'2021-09-04 07:01:02',null,null)
;

在这里插入图片描述


题目2:插入记录 (二)】
在这里插入图片描述

-- 第一列为自增主键列,不能直接复制过去
-- 只复制2021年之前的记录 & 只复制已完成了的试题作答纪录:用提交时间
insert into exam_record_before_2021(uid,exam_id,start_time,submit_time,score)
select uid,exam_id,start_time,submit_time,score from exam_record
where substr(submit_time,1,4)<='2021'
;

在这里插入图片描述


题目3:插入记录 (三)】
在这里插入图片描述

replace into examination_info(exam_id,tag,difficulty,duration,release_time) values
(9003,'SQL','hard',90,'2021-01-01 00:00:00')
;

在这里插入图片描述

2. 更新记录

题目1:更新记录 (一)】
在这里插入图片描述

修改记录的方式汇总:
1. 设置为新值:UPDATE table_name SET column_name=new_value [, column_name2=new_value2] [WHERE column_name3=value3]
2. 根据已有值替换:UPDATE table_name SET key1=replace(key1, ‘查找内容’, ‘替换成内容’) [WHERE column_name3=value3]

-- 方法1
update examination_info
set tag='Python'
where tag='PYTHON'
;
-- 方法2
update examination_info
set tag=replace(tag,'PYTHON','Python')
where tag='PYTHON'
;

-- 思维扩展:第二种方式不仅可用于整体替换,还能做子串替换,例如要实现将tag中所有的PYTHON替换为Python(如CPYTHON=>CPython),可写作:
update examination_info
set tag = replace(tag, "PYTHON", "Python")
where tag like "%PYTHON%";
;

在这里插入图片描述


题目2:更新记录 (二)】
在这里插入图片描述

-- 未完成:分数为null
update exam_record
set submit_time='2099-01-01 00:00:00',score=0
where substr(start_time,1,10)<'2021-09-01' and score is null;
;

在这里插入图片描述

3. 删除记录

题目1:删除记录 (一)】
在这里插入图片描述

删除记录的方式汇总
1. 根据条件删除:DELETE FROM tb_name [WHERE options] [ [ ORDER BY fields ] LIMIT n ]
2. 全部删除(表清空,包含自增计数器重置):TRUNCATE TABLE tb_name

-- 时间差:TIMESTAMPDIFF(interval, time_start, time_end)可计算time_start-time_end的时间差,单位以指定的interval为准,
-- 常用可选:SECOND 秒
-- 		 MINUTE 分钟(返回秒数差除以60的整数部分)
-- 		 HOUR 小时(返回秒数差除以3600的整数部分)
-- 		 DAY 天数(返回秒数差除以3600*24的整数部分)
-- 		 MONTH 月数
-- 		 YEAR 年数

delete from exam_record
where timestampdiff(minute,start_time,submit_time)<5 and score<60
;

在这里插入图片描述

题目2:删除记录 (二)】
在这里插入图片描述

delete from exam_record
where submit_time is null or timestampdiff(minute,start_time,submit_time)<5
order by start_time
limit 3
;

在这里插入图片描述

题目3:删除记录 (三)】
在这里插入图片描述

truncate table exam_record;

在这里插入图片描述

二、表与索引操作

1. 表的创建、修改与删除

题目1:创建一张新表】
在这里插入图片描述

表的创建、修改与删除
1.1 直接创建表
CREATE TABLE [IF NOT EXISTS] tb_name – 不存在才创建,存在就跳过
(column_name1 data_type1 – 列名和类型必选
[ PRIMARY KEY – 可选的约束,主键
| FOREIGN KEY – 外键,引用其他表的键值
| AUTO_INCREMENT – 自增ID
| COMMENT comment – 列注释(评论)
| DEFAULT default_value – 默认值
| UNIQUE – 唯一性约束,不允许两条记录该列值相同
| NOT NULL – 该列非空
], …
) [CHARACTER SET charset] – 字符集编码
[COLLATE collate_value] – 列排序和比较时的规则(是否区分大小写等)
1.2 从另一张表复制表结构创建表
CREATE TABLE tb_name LIKE tb_name_old
1.3 从另一张表的查询结果创建表
CREATE TABLE tb_name AS SELECT * FROM tb_name_old WHERE options
2.1 修改表:ALTER TABLE 表名 修改选项
选项集合:
{ ADD COLUMN <列名> <类型> [first|after 字段名] – 增加列
| CHANGE COLUMN <旧列名> <新列名> <新列类型> – 修改列名
| MODIFY COLUMN <列名> <新类型> [新约束] – 修改列类型或约束
| MODIFY COLUMN <列名> <新类型> first – 将某一列放到第一列
| DROP COLUMN <列名> – 删除列
| RENAME TO <新表名> – 修改表名
| CHARACTER SET <字符集名> – 修改字符集
| COLLATE <校对规则名> } – 修改校对规则(比较和排序时用到)
3.1 删除表:DROP TABLE [IF EXISTS] 表名1 [ ,表名2]。

create table user_info_vip(
id     			INT(11)  		PRIMARY KEY AUTO_INCREMENT 		COMMENT '自增ID'
,uid   			INT(11)  		UNIQUE NOT NULL         		COMMENT '用户ID'
,nick_name   	VARCHAR(64)      								COMMENT '昵称'
,achievement	INT(11)     	DEFAULT 0 						COMMENT '成就值'
,level			INT(11)						   					COMMENT '用户等级'
,job			VARCHAR(32)					  					COMMENT '职业方向'
,register_time	DATETIME		DEFAULT CURRENT_TIMESTAMP		COMMENT '注册时间'
) CHARACTER SET utf8 COLLATE utf8_general_ci COMMENT = '优质用户信息表'
;

在这里插入图片描述

题目2:修改表】
在这里插入图片描述

-- 易出错的地方:当想要设置default的值为数字0时,前面必须加上<列类型>为INT,否则会报default的错误
alter table user_info add column school varchar(15) after level;
alter table user_info change job profession varchar(10);
alter table user_info modify column achievement int(11) default 0;

在这里插入图片描述

题目3:删除表】
在这里插入图片描述

drop table if exists 
exam_record_2011, 
exam_record_2012, 
exam_record_2013, 
exam_record_2014;

在这里插入图片描述

2. 索引的创建、删除

题目1:创建索引】
在这里插入图片描述

索引创建、删除与使用
1.1 create方式创建索引
CREATE
[UNIQUE – 唯一索引
| FULLTEXT – 全文索引
] INDEX index_name ON table_name – 不指定唯一或全文时默认普通索引
(column1[(length) [DESC|ASC]] [,column2,…]) – 可以对多列建立组合索引
1.2 alter方式创建索引
ALTER TABLE tb_name ADD [UNIQUE | FULLTEXT] [INDEX] index_content(content) – 主键索引
2.1 drop方式删除索引
DROP INDEX <索引名> ON <表名>
2.2 alter方式删除索引
ALTER TABLE <表名> DROP INDEX <索引名>
3.1 索引的使用
(1)索引使用时满足最左前缀匹配原则,即对于组合索引(col1, col2),在不考虑引擎优化时,条件必须是col1在前col2在后,或者只使用col1,索引才会生效;
(2)索引不包含有NULL值的列
(3)一个查询只使用一次索引,where中如果使用了索引,order by就不会使用
(4)like做字段比较时只有前缀确定时才会使用索引
(5)在列上进行运算后不会使用索引,如year(start_time)<2020不会使用start_time上的索引
3.2 不同索引的区别
(1)主键索引: 它是一种特殊的唯一索引,不允许有空值。一般是在建表的时候指定了主键,就会创建主键索引, CREATE INDEX不能用来创建主键索引,使用 ALTER TABLE来代替。
(2)唯一索引:与普通索引类似,不同的就是:索引列的值必须唯一,但允许有空值。如果是组合索引,则列值的组合必须一。
(3)普通索引:这是最基本的索引,它没有任何限制。
(4)全文索引:FULLTEXT索引用于全文搜索。只有InnoDB和 MyISAM存储引擎支持 FULLTEXT索引和仅适用于 CHAR, VARCHAR和 TEXT列。

CREATE INDEX 			idx_duration 		ON examination_info(duration);
CREATE UNIQUE INDEX 	uniq_idx_exam_id 	ON examination_info(exam_id);
CREATE FULLTEXT INDEX 	full_idx_tag 		ON examination_info(tag);

在这里插入图片描述


题目2:删除索引】

-- 方法1:
DROP INDEX uniq_idx_exam_id ON examination_info;
DROP INDEX full_idx_tag ON examination_info;

-- 方法2:
ALTER TABLE examination_info DROP INDEX uniq_idx_exam_id;
ALTER TABLE examination_info DROP INDEX full_idx_tag;

三、聚合分组查询

1. 聚合函数

题目1:SQL类别高难度试卷得分的截断平均值】

select 
	tag	
	,difficulty	
	,round((sum_score-max_score-min_score)/(score_cnt-2),1) clip_avg_score
from (
		select 
			tag
			,difficulty
			,count(score) score_cnt 
			,sum(score) sum_score
			,max(score) max_score
			,min(score) min_score
		from (
				select 
					exam_id
					,uid
					,score
				from exam_record
		) a join (
					select 
						exam_id
						,tag 
						,difficulty
					from examination_info
					where tag='SQL' and difficulty='hard'
		) b on a.exam_id=b.exam_id
		group by tag,difficulty
) a1
;

题目2:统计作答次数】

select 
	count(1) total_pv	
	,count(submit_time) complete_pv	
	,count(distinct if(submit_time is null,exam_id=null,exam_id)) complete_exam_cnt
from exam_record
;

题目3:得分不小于平均分的最低分(这题有bug,运行不通过)】

select 
	min(score) min_score_over_avg
from (
		select 
			score  		
			,avg(score) over()  avg_score
		from (
				select 
					exam_id
					,score
				from exam_record
		) a join (
					select 
						exam_id
						,tag
					from examination_info
					where tag='SQL'
		) b on a.exam_id=b.exam_id
) a1
where score>=avg_score
;

2. 分组查询

题目1:平均活跃天数和月活人数】

select
	month
	,round(avg(day_m),2) avg_active_days
	,round(count(distinct uid),2) mau
from (
		select
			date_format(submit_time,'%Y%m') month
			,uid 
			,count(distinct substr(submit_time,1,10)) day_m
		from exam_record
		where substr(submit_time,1,4)='2021'
		group by date_format(submit_time,'%Y%m'),uid
) a 
group by month
;

题目2:月总刷题数和日均刷题数】

-- 使用 WITH ROLLUP,此函数是对聚合函数进行求和,注意 with rollup是对 group by 后的第一个字段,进行分组计算。
select 
	ifnull(submit_month,'2021汇总') submit_month
	,month_q_cnt
	,round(avg_day_q_cnt,3) avg_day_q_cnt
from (
			select 
				date_format(submit_time,'%Y%m') submit_month
				,count(question_id)  month_q_cnt
				,count(question_id)/max(day(last_day(submit_time))) avg_day_q_cnt
			from practice_record
			where substr(submit_time,1,4)='2021'
			group by date_format(submit_time,'%Y%m')
			with rollup 	
) a 
order by submit_month
;

题目3:未完成试卷数大于1的有效用户】

-- 对于每条作答tag,用:连接日期和tag:concat_ws(':', date(start_time), tag)
-- 对于一个人(组内)的多条作答,用;连接去重后的作答记录:group_concat(distinct concat_ws(':', date(start_time), tag) SEPARATOR ';')
select
	uid
	,sum(if(submit_time is null,1,0)) incomplete_cnt
	,sum(if(submit_time is not null,1,0)) complete_cnt   
	,group_concat(distinct concat_ws(':', date(start_time), tag) SEPARATOR ';') detail
from (
		select
			uid 
			,exam_id
			,submit_time
			,start_time
		from exam_record
		where substr(start_time,1,4)='2021'
) a left join (
				select
					exam_id 
					,tag 
				from examination_info
) b on a.exam_id=b.exam_id
group by uid
having sum(if(submit_time is not null,1,0))>=1
and sum(if(submit_time is null,1,0))<5
and sum(if(submit_time is null,1,0))>1
order by incomplete_cnt desc
;

四、多表查询

1. 嵌套子查询

题目1:月均完成试卷数不小于3的用户爱作答的类别】

select
	tag
	,count(start_time) tag_cnt
from (
		select
			uid 
			,exam_id
			,submit_time
			,start_time
		from exam_record
) a left join (
				select
					exam_id 
					,tag 
				from examination_info
) b on a.exam_id=b.exam_id
where uid in (
				select
					uid
				from exam_record
				where submit_time is not null
				group by uid,substr(start_time,1,7) 
				having count(start_time)>=3
			 )
group by tag
order by tag_cnt desc
;

题目2:试卷发布当天作答人数和平均分】

select 
	c.exam_id	
	,count(distinct b.uid) uv	
	,round(avg(score),1) avg_score
from(
		select 
			uid
			,level		
		from user_info
		where level>5
) a join (
			select 
				uid
				,exam_id
				,score
				,substr(start_time,1,10) start_time
			from exam_record
) b on a.uid=b.uid
join (
		select
			exam_id
			,tag
			,substr(release_time,1,10) release_time
		from examination_info
		where tag='SQL'
) c on b.exam_id=c.exam_id
where start_time=release_time
group by c.exam_id
order by uv desc,avg_score asc
;

题目3:作答试卷得分大于过80的人的用户等级分布】

select 
	level
	,count(distinct uid) level_cnt
from (
		select 
			a.exam_id
			,a.tag
			,b.uid
			,b.score
			,c.level
		from (
				select
					exam_id
					,tag
				from examination_info
				where tag='SQL'
		) a join (
					select 
						uid
						,exam_id
						,score
					from exam_record
					where score>80
		) b on a.exam_id=b.exam_id
		join (
				select 
					uid
					,level		
				from user_info
		) c on b.uid=c.uid
) a1
group by level
order by level_cnt desc,level desc
;

2. 合并查询

题目1:每个题目和每份试卷被作答的人数和次数】

-- 先UNION后ORDER BY时,只需要在最后一个SELECT语句中包含ORDER BY;
select 
	exam_id tid
	,count(distinct uid) uv 
	,count(uid) pv
from exam_record 
group by exam_id
union all
select 
	question_id tid
	,count(distinct uid) uv 
	,count(uid) pv
from practice_record 
group by question_id
order by uv desc,pv desc
-- 先ORDER BY后UNION时,由于优先级问题,需要将子查询用括号括起来,且ORDER BY后面必须有 LIMIT`;
#-- 题目答案:
select 
	tid
	,uv
	,pv
from (
		select 
			exam_id tid
			,count(distinct uid) uv 
			,count(uid) pv
		from exam_record 
		group by exam_id
		order by uv desc,pv desc
) a 
union all
select 
	tid
	,uv
	,pv
from (
		select 
			question_id tid
			,count(distinct uid) uv 
			,count(uid) pv
		from practice_record 
		group by question_id
		order by uv desc,pv desc
) b 
;

题目2:分别满足两个活动的人】

select 
	uid 
	,'activity1' activity
from exam_record
where substr(start_time,1,4)='2021'
group by uid 
having min(score)>=85
union all 
select
	uid
	,'activity2' activity
from (
		select 
			uid
			,exam_id
			,start_time
			,submit_time
			,score 
		from exam_record 
		where substr(start_time,1,4)='2021'
		and score>80
) a  join (
				select 
					exam_id
					,difficulty
					,duration
				from examination_info
				where difficulty='hard'
) b on a.exam_id=b.exam_id
where timestampdiff(second,start_time,submit_time)<=duration*30
order by uid
;

3. 连接查询

题目1:满足条件的用户的试卷完成数和题目练习数】

select 
	a.uid
	,ifnull(exam_cnt,0) exam_cnt
	,ifnull(question_cnt,0) question_cnt
from (
		-- 红名大佬2021年试卷总完成次数
		select 
			uid	
			,count(submit_time) exam_cnt	
		from exam_record 
		where substr(start_time,1,4)='2021'
		and uid in (
						-- 高难度SQL试卷得分平均值大于80并且是7级的红名大佬
						select 
							c.uid
						from (
								-- 高难度SQL试卷
								select 
									exam_id
								from examination_info
								where difficulty='hard'
								and tag='SQL' 
						) a join (
									select 
										uid
										,exam_id
										,score
									from exam_record
						) b on a.exam_id=b.exam_id
						join (
								-- 7级的红名大佬
								select 
									uid 
								from user_info
								where level=7
						) c on b.uid=c.uid 
						group by c.uid 
						having avg(score)>80
					) 
		group by uid
) a left join (
				-- 红名大佬2021年试卷总完成次数
				select 
					 uid
					 ,count(submit_time) question_cnt
				from practice_record
				where substr(submit_time,1,4)='2021'
				group by uid 
) b on a.uid=b.uid
order by exam_cnt asc,question_cnt desc
;

题目2:每个6/7级用户活跃情况】

-- 活跃天数考虑跨天的情况
select 
	a.uid 											-- 每个6/7级用户
	,ifnull(e.active_d,0) 	act_month_total			-- 总活跃月份数
	,ifnull(d.active_d,0) 	act_days_2021			-- 2021年活跃天数
	,ifnull(b.active_d,0) 	act_days_2021_exam		-- 2021年试卷作答活跃天数
	,ifnull(c.active_d,0)   act_days_2021_question	-- 2021年答题作答活跃天数
from (
		-- 每个6/7级用户
		select
			uid 
		from user_info
		where level in ('6','7')
) a left join (
				-- 2021年试卷作答活跃天数
				select 
					uid 
					,count(active_d) active_d
				from (
						select 
							uid 
							,substr(start_time,1,10) active_d 
						from exam_record
						where substr(start_time,1,4)='2021'
						group by uid,substr(start_time,1,10)
						union 
						select 
							uid 
							,substr(submit_time,1,10) active_d 
						from exam_record
						where substr(submit_time,1,4)='2021'
						group by uid,substr(submit_time,1,10) 
				) bb
				group by uid 
) b on a.uid=b.uid 
left join (
			-- 2021年答题作答活跃天数
			select 
				uid 
				,count(active_d) active_d
			from (
					select 
						uid 
						,substr(submit_time,1,10) active_d
					from practice_record
					where substr(submit_time,1,4)='2021'
					group by uid,substr(submit_time,1,10) 
			) cc 
			group by uid 
) c on a.uid=c.uid
left join (
			-- 2021年活跃天数
	  		select 
	  			uid 
	  			,count(active_d) active_d
	  		from (
					select 
						uid 
						,substr(start_time,1,10) active_d 
					from exam_record
					where substr(start_time,1,4)='2021'
					group by uid,substr(start_time,1,10)
					union 
					select 
						uid 
						,substr(submit_time,1,10) active_d 
					from exam_record
					where substr(submit_time,1,4)='2021'
					group by uid,substr(submit_time,1,10) 
					union 
					select 
						uid 
						,substr(submit_time,1,10) active_d
					from practice_record
					where substr(submit_time,1,4)='2021'
					group by uid,substr(submit_time,1,10) 
			) dd 
			group by uid 
) d on a.uid=d.uid
left join (
			select 
				uid 
				,count(active_d) active_d
			from (
					-- 2021年活跃天数
					select 
						uid 
						,substr(start_time,1,7) active_d 
					from exam_record
					group by uid,substr(start_time,1,7)
					union 
					select 
						uid 
						,substr(submit_time,1,7) active_d 
					from exam_record
					group by uid,substr(submit_time,1,7) 
					union 
					select 
						uid 
						,substr(submit_time,1,7) active_d
					from practice_record
					group by uid,substr(submit_time,1,7) 
			) ee 
			group by uid 
) e on a.uid=e.uid
order by act_month_total desc,act_days_2021	desc
;

五、窗口函数

1. 专用窗口函数

题目1:每类试卷得分前3名】

select 
	tag
	,uid 
	,ranking 
from (
		select 
			tag  
			,uid	
			,max(score) max_score   -- 获取各个科目每个用户得分的最大值
			,min(score) min_score 	-- 获取各个科目每个用户得分的最小值
			,row_number() over(partition by tag order by max(score) desc,min(score) desc,uid desc) ranking 
		from (
				select 
					exam_id 
					,uid 
					,score
				from exam_record 
		) a left join (
						select 
							exam_id 
							,tag 
						from examination_info 
		) b on a.exam_id=b.exam_id
		group by tag,uid
) a1
where ranking <=3
;

题目2:第二快/慢用时之差大于试卷时长一半的试卷】

select distinct 
	exam_id 
	,duration
	,release_time
from (
		select 
			a.exam_id
			,diff
			,duration
			,duration*30  duration_half   
			,release_time
			,nth_value(diff,2) over(partition by exam_id order by diff rows between unbounded preceding and unbounded following) fast_two  
			,nth_value(diff,2) over(partition by exam_id order by diff desc rows between unbounded preceding and unbounded following) low_two
		from (
				select 
					exam_id
					,duration
					,release_time
				from examination_info
		) a left join (
						select 
							exam_id
							,start_time
							,submit_time
							,timestampdiff(second,start_time,submit_time) diff
						from exam_record
		) b on a.exam_id=b.exam_id
) a1
where low_two-fast_two>duration_half
order by exam_id desc
;

题目3:连续两次作答试卷的最大时间窗】

select 
	uid
	,days_window
	,round(exam_cnt/diff_days*days_window, 2)  avg_exam_cnt
from (
		select 							
			uid 
			,count(exam_id)  exam_cnt  -- 此人作答的总试卷数
			,datediff(max(start_time),min(start_time))+1  diff_days    -- 最早一次作答和最晚一次作答的相差天数
			,max(datediff(next_start_time,start_time))+1  days_window  -- 两次作答的最大时间窗
		from (
				select distinct 
					uid
					,exam_id
					,date(start_time) start_time
					,lead(date(start_time),1) over(partition by uid order by date(start_time)) next_start_time   -- 将连续的下次作答时间拼上
				from exam_record
				where substr(start_time,1,4)='2021'
				and uid in (
								-- 2021年至少有两天作答过试卷的人
								select 
									uid
								from exam_record
								where substr(start_time,1,4)='2021'
								group by uid 
								having count(distinct date(start_time))>=2
							) 
		) a
		group by uid 
) a1 
order by days_window desc,avg_exam_cnt desc
;

题目4:近三个月未完成试卷数为0的用户完成情况】

select 
	uid
	,count(submit_time)  exam_complete_cnt
from (
		select 
			uid
			,start_time
			,submit_time
			,score
			,dense_rank() over(partition by uid order by substr(start_time,1,7) desc) rn 
		from exam_record
) a
where rn<=3
group by uid
having count(start_time)=count(submit_time) 
order by exam_complete_cnt desc,uid desc
;

题目5:未完成率较高的50%用户近二个月答卷情况】

-- 题目解释中说“1001、1002、1003分别排在1.0、0.5、0.0的位置”,有意让我们用percent_rank,实际cume_dist更常用
select 
	uid  
	,start_month
	,count(start_time)  total_cnt
	,count(submit_time) complete_cnt
from(
		-- 有试卷作答记录的近三个月
		select 
			a.uid
			,a.start_time
			,a.submit_time
			,date_format(a.start_time,'%Y%m') start_month 
			,dense_rank() over(partition by a.uid order by date_format(a.start_time,'%Y%m') desc) dr
		from (
				select 
					uid
					,start_time
					,submit_time
				from exam_record
		) a join (
					-- 6级和7级用户
					select 
						uid 
					from user_info
					where level in ('6','7')
		) b on a.uid=b.uid
		join (
				-- SQL试卷上未完成率较高的50%用户
				select 
					uid
				from (
						select 
							uid
							,sum(if(submit_time is null,1,0)) 	incomplete_cnt    -- 未完成试卷数
							,count(start_time) 					total_cnt		  -- 试卷总数
							,sum(if(submit_time is null,1,0))/count(start_time) incomplete_rate    -- 未完成试卷率
							-- ,cume_dist() over(order by sum(if(submit_time is null,1,0))/count(start_time) desc) ct  
							,percent_rank() over(order by sum(if(submit_time is null,1,0))/count(start_time) desc) pr -- 未完成试卷率百分比
						from (
								select 
									exam_id
								from examination_info
								where tag='SQL'
						) a join (
									select 
										exam_id
										,uid 
										,start_time
										,submit_time
									from exam_record
						) b on a.exam_id=b.exam_id
						group by uid 
				) a1
				where pr<=0.5
		) c on a.uid=c.uid
) a1
where dr<=3
group by uid,start_month
order by uid,start_month
;

题目6:试卷完成数同比2020年的增长率及排名变化】

#第三步:取出所有需要的字段,并计算排名变化(2021-2020年,无需取绝对值)
select 
	tag
	,exam_cnt_20
	,exam_cnt_21
	,concat(round((exam_cnt_21/exam_cnt_20-1)*100,1),'%') growth_rate
	,exam_cnt_rank_20
	,exam_cnt_rank_21
-- 	,exam_cnt_21-exam_cnt_20 rank_delta
	,cast(exam_cnt_rank_21 as signed) - cast(exam_cnt_rank_20 as signed) rank_delta   -- 将value转换成INT(有符号的整数)格式
from (
		#第二步:计算growth_rate
		select 
			tag
			,max(if(year='2020',exam_cnt,null)) exam_cnt_20
			,max(if(year='2021',exam_cnt,null)) exam_cnt_21
			,max(if(year='2020',exam_cnt_rank,null)) exam_cnt_rank_20
			,max(if(year='2021',exam_cnt_rank,null)) exam_cnt_rank_21
		from (
			     #第一步:找到所有tag在2020,2021上半年的完成数和排名
					select 
						tag
						,substr(b.submit_time,1,4) year
						,count(b.submit_time) exam_cnt
						,rank() over(partition by substr(b.submit_time,1,4) order by count(b.submit_time) desc) exam_cnt_rank
					from (
								select 
									exam_id
									,tag
								from examination_info
					) a join (					
											select 
												exam_id
												,start_time
												,submit_time
											from exam_record 
											where substr(submit_time,1,10) between '2021-01-01' and '2021-06-30'
					) b on a.exam_id=b.exam_id
					group by tag,substr(b.submit_time,1,4)
					union all
					select 
						tag
						,substr(c.submit_time,1,4) year
						,count(c.submit_time) exam_cnt
						,rank() over(partition by substr(c.submit_time,1,4) order by count(c.submit_time) desc) exam_cnt_rank
					from (
								select 
									exam_id
									,tag
								from examination_info
					) a join (					
										select 
											exam_id
											,start_time
											,submit_time
										from exam_record 
										where substr(submit_time,1,10) between '2020-01-01' and '2020-06-30'
					) c on a.exam_id=c.exam_id
					group by tag,substr(c.submit_time,1,4)
		) a1
		group by tag
) a2
where exam_cnt_20 is not null and exam_cnt_21 is not null  # 第四步:筛选出2020和2021年均有完成记录的tag,并按题目要求排序
order by growth_rate desc,exam_cnt_rank_21 desc
;

2. 聚合窗口函数

题目1:对试卷得分做min-max归一化】

select 
	uid
	,exam_id
	,round(avg(min_max),0) avg_new_score		#归一化后分数平均值
from (
		select 
			uid
			,exam_id
			,score
			,if(min_score=max_score,score,(score-min_score)/(max_score-min_score)*100) min_max   #归一化后缩放到[0,100]区间
		from (
				select 
					b.uid
					,b.exam_id
					,b.score
					,min(b.score) over(partition by b.exam_id) min_score   #求每类试卷的得分最小值
					,max(b.score) over(partition by b.exam_id) max_score   #求每类试卷的得分最大值
				from (
						select 
							exam_id
							,difficulty
						from examination_info
						where difficulty ='hard'		#难度为hard
				) a join (
							select 
								uid
								,exam_id
								,score
							from exam_record
				) b on a.exam_id=b.exam_id
				where b.score is not null    			#得分不为null
		) a1
) a2
group by uid,exam_id
order by exam_id,avg_new_score desc						#按照试卷ID升序,评价分降序排序
;

题目2:每份试卷每月作答数和截止当月的作答总数】

select 
	exam_id
	,date_format(start_time,'%Y%m') start_month 
	,count(start_time) month_cnt                  #每份试卷每月作答数
	,sum(count(start_time)) over(partition by exam_id order by date_format(start_time,'%Y%m')) cum_exam_cnt   #每份试卷截止当月的作答总数
from exam_record
group by exam_id,date_format(start_time,'%Y%m')
;

题目3:每月及截止当月的答题情况】

select
	start_month
	,count(distinct uid) 	mau    					#月活用户数
	,sum(new_day) 				month_add_uv		#新增用户数
	,max(sum(new_day)) over(order by start_month)   #截止当月的单月最大新增用户数 
	,sum(sum(new_day)) over(order by start_month)   #截止当月的累积用户数
from(
		select 
			uid
			,exam_id
			,date_format(start_time,'%Y%m') start_month
			,if(new_user_day=start_time,1,0) new_day      #若用户的登录时间等于首次登录时间则定义为新用户,新用户被标记为1,其他标记为0
		from (
				select 
					uid
					,exam_id
					,start_time
					,min(start_time) over(partition by uid) new_user_day   #每一个用户的首次登录日期
				from exam_record
		) a
) a1
group by start_month
order by start_month
;

六、其他常用操作

1. 空值处理

题目1:统计有未完成状态的试卷的未完成数和未完成率0级用户高难度试卷的平均用时和平均得分】

select 
	exam_id
	,sum(if(submit_time is not null,0,1)) incomplete_cnt   #未完成数
	, round(sum(if(submit_time is not null,0,1))/count(start_time),3)  incomplete_rate #未完成率
from exam_record
group by exam_id
having sum(if(submit_time is not null,0,1))>=1
;

题目2:0级用户高难度试卷的平均用时和平均得分】

select 
	uid
	,round(avg(score),0) avg_score      			#考试平均得分
	,round(avg(diff_m),1) avg_time_took				#考试平均用时
from (
		select 
			a.uid
			,c.exam_id
			,ifnull(timestampdiff(minute,b.start_time,b.submit_time),c.duration) diff_m   #未完成的默认试卷最大考试时长
			,ifnull(b.score,0) score     #未完成的默认试卷0分处理
		from (
				select 
					uid
					,level
				from user_info
				where `level`='0'      #每个0级用户
		) a join (
					select 
						uid
						,exam_id
						,start_time
						,submit_time
						,score
					from exam_record
		) b on a.uid=b.uid 
		join (
				select 
					exam_id
					,difficulty
					,duration
				from examination_info
				where difficulty='hard'    #所有的高难度试卷
		) c on b.exam_id=c.exam_id
) a1
group by uid
;

2. 高级条件语句

题目1:筛选限定昵称成就值活跃日期的用户】

select
	a.uid
	,a.nick_name
	,a.achievement
from(
		select 
			uid
			,nick_name
			,achievement
		from user_info
		where nick_name like '牛客%号'							#昵称以『牛客』开头『号』结尾、成就值在1200~2500之间
		and achievement between '1200' and '2500'
) a join (
			select  
				uid
			from (
					select 
						uid
						,start_time
						,max(start_time) over(partition by uid) max_time    #最近一次活跃答题在2021年9月
					from exam_record
			) a
			where substr(max_time,1,7)='2021-09'
			group by uid
			union 
			select  
				uid
			from (
					select 
						uid
						,submit_time
						,max(submit_time) over(partition by uid) max_time    #最近一次活跃作答试卷在2021年9月
					from practice_record
			) a
			where substr(max_time,1,7)='2021-09'
			group by uid
) b on a.uid=b.uid
;

题目2:筛选昵称规则和试卷规则的作答记录】

RLIKE后面可以跟正则表达式
正则表达式" ^ [0-9]+$ "的意思
1、字符^
意义:表示匹配的字符必须在最前边。
例如:^A不匹配“an A”中的‘A’,但匹配“An A”中最前面的‘A’。
2、字符$
意义:与^类似,匹配最末的字符。
例如:t$不匹配“eater”中的‘t’,但匹配“eat”中的‘t’。
3、字符[0-9]
意义:字符列表,匹配列出中的任一个字符。你可以通过连字符-指出字符范围。
例如:[abc]跟[a-c]一样。它们匹配“brisket”中的‘b’和“ache”中的‘c’。
4、字符+
意义:匹配+号前面的字符1次及以上。等价于{1,}。
例如:a+匹配“candy”中的‘a’和“caaaaaaandy”中的所有‘a’。

select 
	b.uid	
	,b.exam_id
	,round(avg(score),0) avg_score
from (
		select 
			uid
			,nick_name
		from user_info
		where (nick_name rlike '^牛客[0-9]+号$') or (nick_name rlike '^[0-9]+$')  #昵称以"牛客"+纯数字+"号"或者纯数字组成
) a join (
			select 
				uid
				,exam_id
				,score
				,submit_time
			from exam_record
			where submit_time is not null         # 已完成的试卷
) b on a.uid=b.uid
join (
		select 
			exam_id
			,tag 
		from examination_info    
		where tag rlike '^[cC]'       # 字母c开头的试卷类别(如C,C++,c#等)
) c on b.exam_id=c.exam_id
group by b.uid,b.exam_id
order by b.uid,avg_score
;

题目3:根据指定记录是否存在输出不同情况】

with t1 as(
			select 
				a.uid
				,a.level
				,count(start_time)                 			total_cnt													#每个用户的答题数
				,count(start_time)-count(submit_time)  	incomplete_cnt    								#每个用户的未完成数
				,ifnull(round((count(start_time)-count(submit_time))/count(start_time),3),0)  incomplete_rate  #每个用户的未完成率
			from (
					select 
						uid
						,level
					from user_info
			) a left join (
							select 
								uid
								,exam_id
								,start_time
								,submit_time
							from exam_record
			) b on a.uid=b.uid
			group by a.uid,a.level
) #命名为t1的子查询

select 
	uid	
	,incomplete_cnt	
	,incomplete_rate
from t1
where exists(select uid from t1 where level='0' and incomplete_cnt>2)     #出现level=0且存在incomplete_cnt>2时
and level='0' 																														#输出level=0的用户未完成数和未完成率
union all 
select 
	uid	
	,incomplete_cnt	
	,incomplete_rate
from t1
where not exists(select uid from t1 where level='0' and incomplete_cnt>2) #没出现level=0且存在incomplete_cnt>2时
and total_cnt>=1																													#输出所有有作答记录的用户的未完成数和未完成率
order by incomplete_rate
;

题目4:各用户等级的不同得分表现占比】

select 
	level
	,score_grade
	,round(count(score_grade)/total_cnt,3)  ratio # 各得分等级占比
from (
		select
			a.uid
			,b.exam_id
			,b.submit_time
			,a.level
			,b.score_grade
			,count(b.score_grade) over(partition by a.level) total_cnt  #各等级完成试卷总数
		from ( 
				select 
					uid
					,level 
				from user_info
		) a join (
					select 
						uid
						,exam_id
						,case when score>=90 then '优'
							  when score>=75 then '良'
							  when score>=60 then '中'
							  when score<60  then '差' 
							  else null end score_grade   # 优良中差四个得分等级
						,submit_time
						,score
					from exam_record
		) b on a.uid=b.uid	
		where submit_time is not null		        				# 在完成过的试卷中
) a1
group by level,score_grade
order by level desc,ratio desc
;

3. 限量查询

题目1:注册时间最早的三个人】

select 
	uid	
	,nick_name	
	,register_time	
from (
		select 
			uid	
			,nick_name	
			,register_time
			,row_number() over(order by register_time) rn
		from user_info
) a
where rn<=3
;

题目2:注册当天就完成了试卷的名单三页】

select 
	uid
	,level
	,register_time 
	,max_score
from (
		select 
			b.uid
			,b.exam_id
			,b.submit_time
			,a.register_time
			,a.level
			,b.score
			,d.max_score
		from (
				select 
					uid
					,level
					,job
					, register_time
				from user_info
				where job='算法'					#求职方向为算法工程师
		) a join (
					select 
						uid
						,exam_id
						,submit_time
						,score
					from exam_record
		) b on a.uid=b.uid
		join (
				select 
					exam_id
					,tag
				from examination_info
				where tag='算法'					#算法类试卷
		) c on b.exam_id=c.exam_id
		join (
				select 
					uid
					,max(score)  max_score		#参加过的所有考试最高得分
				from exam_record
				group by uid
		) d on a.uid=d.uid
		where date(a.register_time)=date(b.submit_time)   #注册当天就完成了算法类试卷
) a1
order by max_score desc
LIMIT 6,3												  #展示7-9行
;

4. 文本转换函数

题目1:修复串列了的记录】

select 
	exam_id	
	,substring_index(tag,',',1) tag	
	,substring_index(substring_index(tag,',',2),',',-1) difficulty	
	,substring_index(tag,',',-1) duration
from examination_info
where tag like '%,%'
;

题目2:对过长的昵称截取处理】

获取字符串长度
LENTH:获取字符串的字节数量,一个中文字符通常不止占用一个字节
CHAR_LENTH:获取字符数量,不管汉字、字母、数字都算一个字符

select 
	uid	
	,if(char_length(nick_name)>13,concat(substr(nick_name,1,10),'...'),nick_name) nick_name
from user_info
where char_length(nick_name)>10
;

题目3:大小写混乱时的筛选统计】

with t_tag_count as (
						select 
							tag
							,count(start_time) answer_cnt
						from (
										select 
											exam_id
											,tag
										from examination_info
						) a join (
												select 
													exam_id
													,start_time
												from exam_record
						) b on a.exam_id=b.exam_id
						group by tag
) 	
							
select 
	a.tag
	,b.answer_cnt
from t_tag_count  a
join t_tag_count  b
on upper(a.tag) = b.tag and a.tag != b.tag and a.answer_cnt < 3
;				

文章来源:https://blog.csdn.net/myy12456789/article/details/135071291
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。