SQL join语法案例
Data:- order.txt
- order011,u001,300
- order012,u002,200
- order023,u006,100
- order056,u007,300
- order066,u003,500
- order055,u004,300
- order021,u005,300
- order014,u001,100
- order025,u005,300
- order046,u007,30
- order067,u003,340
- order098,u008,310
- user.txt
- u001,hls,22,fengjie
- u002,wangwu,31,lisi
- u003,zhangyanru,22,tananpengyou
- u004,laocao,26,fengyi
- u005,mengqi,12,nvmengqi
- u006,haolei,38,sb
- u007,wanghongjing,24,wife
- u009,wanghongjing,24,wife
- 返回一个结果:order011 u001 300 hls 22 fengjie
复制代码 代码示例:- package com.doit.day03
- import scala.io.{BufferedSource, Source}
- object JoinDemo {
- def main(args: Array[String]): Unit = {
- //u001,hls,22,fengjie
- val bs1= Source.fromFile("D:\\develop\\ideaWorkSpace\\myself\\study\\scalaDemo\\data\\user.txt")/*.getLines().toList*/
- //order011,u001,300
- val bs2 = Source.fromFile("D:\\develop\\ideaWorkSpace\\myself\\study\\scalaDemo\\data\\order.txt")/*.getLines().toList*/
- //实现left join
- /*
- //将用户数据转换成map集合
- val users: Iterator[String] = bs1.getLines()
- val iters: Iterator[(String, (String, String, String, String))] = users.map(_.split(",", -1)).map(x => (x(0), (x(0), x(1), x(2), x(3))))
- val map: Map[String, (String, String, String, String)] = iters.toMap
- // 将订单数据转换成list集合
- val orders: Iterator[String] = bs2.getLines()
- val iters2: Iterator[(String, (String, String))] = orders.map(_.split(",", -1)).map(x => (x(1), (x(0), x(1))))
- val list2: List[(String, (String, String))] = iters2.toList
- //遍历每个订单 拼接用户信息
- var r = list2.map(x => {
- val user = map.getOrElse(x._1, ("null", "null", "null", "null"))
- (user._1, user._2, user._3, user._4, x._2._1)
- })
- // 打印结果
- r.sortBy(_._1).foreach(println)
- */
- //实现join
- /*
- val userTuple: List[(String, String, String, String)] = users.map(line => {
- val arr: Array[String] = line.split(",")
- //user_id,user_name,age,name
- (arr(0), arr(1), arr(2), arr(3))
- })
- val orderTuple: List[(String, String, String)] = orders.map(line => {
- val arr: Array[String] = line.split(",")
- //order_id user_id amount
- (arr(0), arr(1), arr(2))
- })
- //join关联条件是user_id = user_id
- for (user <- userTuple) {
- for (order <- orderTuple) {
- if(user._1 == order._2){
- println(user._1,user._2,user._3,user._4,order._1,order._3)
- }
- }
- }
- */
- }
- }
复制代码 线段重叠案例
data:- site1,user1,2018-03-01 02:12:22
- site1,user2,2018-03-05 04:12:22
- site1,user2,2018-03-05 04:13:22
- site1,user2,2018-03-05 04:14:22
- site1,user2,2018-03-05 04:15:22
- site4,user7,
- site1,user2,2018-03-05 05:15:22
- site1,user2,2018-03-05 08:15:22
- site1,user3,2018-03-05 04:15:22
- site1,user4,2018-03-05 05:15:22
- site1,user3,2018-03-07 11:12:22
- site1,user3,2018-03-08 11:12:22
- site2,user4,2018-03-07 15:12:22
- site3,user5,2018-03-07 08:12:22
- site3,user6,2018-03-05 08:12:22
- site1,user1,2018-03-08 11:12:22
- site1,,2018-03-08 11:12:22
- site2,user2,2018-03-07 15:12:22
- site3,user5,2018-03-07 08:12:22
- site3,user5,2018-03-07 18:12:22
- site3,user6,2018-03-05 08:12:22
- site4,user7,2018-03-03 10:12:22
- site2,,2018-03-08 11:12:22
- site3,user5,2018-03-07 08:12:22
- site3,user6,2018-03-05 08:12:22
- site4,user5,2018-03-03 10:12:22
- site4,user7,2018-02-20 11:12:22
复制代码 代码:- package com.doit.day03
- import scala.io.{BufferedSource, Source}
- /**
- * 需求:计算每天的pv和uv
- * pv:浏览次数
- * uv:访客数
- */
- object PVUVDemo {
- def main(args: Array[String]): Unit = {
- val source: BufferedSource = Source.fromFile("D:\\develop\\ideaWorkSpace\\myself\\study\\scalaDemo\\data\\pvuv.txt")
- val list: List[String] = source.getLines().toList
- //过滤一些脏数据
- val filtered: List[String] = list.filter(line => {
- val arr: Array[String] = line.split(",",-1)
- arr.length >= 0 && !arr.exists(_.isEmpty)
- })
- val events: List[(String, String, String)] = filtered.map(line => {
- val arr: Array[String] = line.split(",")
- val date: String = arr(2).substring(0, 10)
- //site1,user1,2018-03-01 02:12:22
- (arr(0), arr(1),date)
- })
- //pv:该页面被浏览了多少次
- val tuples: List[((String, String), String)] = events.map(tp => {
- ((tp._3, tp._1), tp._2)
- })
- val pv: Map[(String, String), Int] = tuples.groupBy(_._1).map(tp => (tp._1, tp._2.size))
- val uv: Map[(String, String), Int] = tuples.groupBy(_._1).map(tp => (tp._1, tp._2.distinct.size))
- println("============pv================")
- pv.foreach(println)
- println("============uv================")
- uv.foreach(println)
- }
- }
复制代码 免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作! |