图书馆数据仓库

打印 上一主题 下一主题

主题 556|帖子 556|积分 1668

目次
1.数据仓库的数据来源为业务数据库(mysql)
       初始化脚本
   init_book_result.sql
2.通过sqoop将mysql中的业务数据导入到大数据平台(hive)
 导入mysql数据到hive中
3.通过hive举行数据计算和数据分析 形成数据报表
4.再通过sqoop将数据报表导出到mysql 
5.使用FineReport制作数据报表


1.数据仓库的数据来源为业务数据库(mysql)

        包含 图书表 t_book_info,
                借书表 t_borrow_info,  
                用户表 t_user_info  38条数据
                图书类别表 dim_books_type 5条
       初始化脚本

        init_mysql.sql
        
  1. -- 设置sql_mode
  2. set sql_mode = 'NO_ENGINE_SUBSTITUTION,STRICT_TRANS_TABLES';
  3. -- 创建数据库library
  4. create database library;
  5. -- 切换数据库
  6. use library;
  7. -- 创建用户信息表
  8. CREATE TABLE t_user_info(
  9.         user_id        varchar(100) not null,
  10.         user_name      varchar(100) not null,
  11.         sex            varchar(10)  not null,
  12.         age            int not null
  13. )DEFAULT CHARSET='utf8';
  14. -- 创建图书表
  15. CREATE TABLE t_book_info(
  16.         book_id        varchar(100) not null,
  17.         book_name            varchar(100) not null,
  18.         type_id        varchar(100) not null
  19. )DEFAULT CHARSET='utf8';
  20. -- 创建图书类别表
  21. CREATE TABLE dim_books_type(
  22.         type_id    varchar(100) not null,
  23.         type_name  varchar(100) not null
  24. )DEFAULT CHARSET='utf8';
  25. -- 创建借书表
  26. CREATE TABLE t_borrow_order(
  27.         user_id        varchar(100) not null,
  28.         book_id   varchar(100) not null,
  29.         create_time    varchar(100) not null
  30. )DEFAULT CHARSET='utf8';
  31. -- 用户信息表插入数据
  32. insert into t_user_info values ('114', '王小名', '男', 22);
  33. insert into t_user_info values ('115', '张美丽', '女', 27);
  34. insert into t_user_info values ('116', '李华', '男', 30);
  35. insert into t_user_info values ('117', '陈晓红', '女', 35);
  36. insert into t_user_info values ('118', '赵伟', '男', 24);
  37. insert into t_user_info values ('119', '周小燕', '女', 29);
  38. insert into t_user_info values ('120', '吴强', '男', 33);
  39. insert into t_user_info values ('121', '郑丽', '女', 26);
  40. insert into t_user_info values ('122', '王刚', '男', 28);
  41. insert into t_user_info values ('123', '黄芳', '女', 31);
  42. insert into t_user_info values ('124', '刘涛', '男', 36);
  43. insert into t_user_info values ('125', '杨静', '女', 25);
  44. insert into t_user_info values ('126', '张杰', '男', 32);
  45. insert into t_user_info values ('127', '陈莉', '女', 23);
  46. insert into t_user_info values ('128', '赵勇', '男', 38);
  47. insert into t_user_info values ('129', '周敏', '女', 30);
  48. insert into t_user_info values ('130', '吴浩', '男', 29);
  49. insert into t_user_info values ('131', '郑薇', '女', 34);
  50. insert into t_user_info values ('132', '王林', '男', 27);
  51. insert into t_user_info values ('133', '黄梅', '女', 26);
  52. insert into t_user_info values ('134', '刘军', '男', 37);
  53. insert into t_user_info values ('135', '杨慧', '女', 25);
  54. insert into t_user_info values ('136', '张明', '男', 33);
  55. insert into t_user_info values ('137', '陈霞', '女', 31);
  56. insert into t_user_info values ('138', '赵海', '男', 28);
  57. insert into t_user_info values ('139', '周兰', '女', 36);
  58. insert into t_user_info values ('140', '吴飞', '男', 24);
  59. insert into t_user_info values ('141', '郑敏', '女', 27);
  60. insert into t_user_info values ('142', '王磊', '男', 35);
  61. insert into t_user_info values ('143', '黄蓉', '女', 30);
  62. insert into t_user_info values ('144', '刘丽', '女', 26);
  63. insert into t_user_info values ('145', '杨勇', '男', 32);
  64. insert into t_user_info values ('146', '张华', '男', 29);
  65. insert into t_user_info values ('147', '陈敏', '女', 34);
  66. insert into t_user_info values ('148', '赵丽', '女', 31);
  67. insert into t_user_info values ('149', '周刚', '男', 25);
  68. insert into t_user_info values ('150', '吴丽', '女', 37);
  69. insert into t_user_info values ('151', '郑强', '男', 28);
  70. insert into t_user_info values ('152', '王芳', '女', 33);
  71. insert into t_user_info values ('153', '黄伟', '男', 27);
  72. insert into t_user_info values ('154', '刘静', '女', 36);
  73. insert into t_user_info values ('155', '杨华', '男', 24);
  74. insert into t_user_info values ('156', '张敏', '女', 31);
  75. insert into t_user_info values ('157', '陈军', '男', 30);
  76. insert into t_user_info values ('158', '赵敏', '女', 26);
  77. insert into t_user_info values ('159', '周强', '男', 35);
  78. insert into t_user_info values ('160', '吴红', '女', 32);
  79. insert into t_user_info values ('161', '郑丽', '女', 29);
  80. insert into t_user_info values ('162', '王伟', '男', 34);
  81. insert into t_user_info values ('163', '黄静', '女', 31);
  82. insert into t_user_info values ('164', '刘涛', '男', 25);
  83. insert into t_user_info values ('165', '杨敏', '女', 37);
  84. insert into t_user_info values ('166', '张勇', '男', 28);
  85. insert into t_user_info values ('167', '陈芳', '女', 33);
  86. insert into t_user_info values ('168', '赵军', '男', 27);
  87. insert into t_user_info values ('169', '周丽', '女', 36);
  88. insert into t_user_info values ('170', '吴华', '男', 24);
  89. commit;
  90. -- 图书表插入数据
  91. -- Book Type: 散文随笔 (a)
  92. insert into t_book_info values ('a001', '珠江潮汐美', 'a');
  93. insert into t_book_info values ('a002', '晨曦的低语', 'a');
  94. insert into t_book_info values ('a003', '山间小路', 'a');
  95. insert into t_book_info values ('a004', '秋叶之歌', 'a');
  96. insert into t_book_info values ('a005', '夜色中的琴声', 'a');
  97. insert into t_book_info values ('a006', '城市边缘的诗人', 'a');
  98. insert into t_book_info values ('a007', '冬日里的暖阳', 'a');
  99. insert into t_book_info values ('a008', '海边的沉思', 'a');
  100. -- book type: 世界名著 (b)
  101. insert into t_book_info values ('b001', '悲惨世界', 'b');
  102. insert into t_book_info values ('b002', '百年孤独', 'b');
  103. insert into t_book_info values ('b003', '双城记', 'b');
  104. insert into t_book_info values ('b004', '战争与和平', 'b');
  105. insert into t_book_info values ('b005', '简爱', 'b');
  106. insert into t_book_info values ('b006', '飘', 'b');
  107. insert into t_book_info values ('b007', '堂吉诃德', 'b');
  108. insert into t_book_info values ('b008', '呼啸山庄', 'b');
  109. -- book type: 少儿童书 (c)
  110. insert into t_book_info values ('c001', '小王子的星球', 'c');
  111. insert into t_book_info values ('c002', '魔法森林的秘密', 'c');
  112. insert into t_book_info values ('c003', '海底两万里', 'c');
  113. insert into t_book_info values ('c004', '勇敢的小火车头', 'c');
  114. insert into t_book_info values ('c005', '神奇的种子', 'c');
  115. insert into t_book_info values ('c006', '月亮上的兔子', 'c');
  116. insert into t_book_info values ('c007', '彩虹桥下的秘密', 'c');
  117. insert into t_book_info values ('c008', '会说话的石头', 'c');
  118. -- book type: 历史小说 (d)
  119. insert into t_book_info values ('d001', '三国演义', 'd');
  120. insert into t_book_info values ('d002', '水浒传', 'd');
  121. insert into t_book_info values ('d003', '大明王朝', 'd');
  122. insert into t_book_info values ('d004', '清朝末年', 'd');
  123. insert into t_book_info values ('d005', '大唐盛世', 'd');
  124. insert into t_book_info values ('d006', '宋朝风云', 'd');
  125. insert into t_book_info values ('d007', '明朝那些事儿', 'd');
  126. insert into t_book_info values ('d008', '清朝宫廷秘史', 'd');
  127. -- book type: 国学入门 (e)
  128. insert into t_book_info values ('e001', '论语解读', 'e');
  129. insert into t_book_info values ('e002', '道德经注释', 'e');
  130. insert into t_book_info values ('e003', '易经初探', 'e');
  131. insert into t_book_info values ('e004', '诗经选读', 'e');
  132. insert into t_book_info values ('e005', '孟子精讲', 'e');
  133. insert into t_book_info values ('e006', '庄子心解', 'e');
  134. insert into t_book_info values ('e007', '大学中庸', 'e');
  135. insert into t_book_info values ('e008', '孝经新解', 'e');
  136. commit;
  137. -- 图书类别表插入数据
  138. insert into dim_books_type values('a','散文随笔');
  139. insert into dim_books_type values('b','世界名著');
  140. insert into dim_books_type values('c','少儿童书');
  141. insert into dim_books_type values('d','历史小说');
  142. insert into dim_books_type values('e','国学入门');
  143. commit;
  144. -- 借书表插入数据
  145. insert into t_borrow_order values('114','a002','2022-11-08 09:23:54');
  146. insert into t_borrow_order values('115','e002','2022-11-08 09:23:54');
  147. insert into t_borrow_order values('114','b003','2022-11-08 09:23:54');
  148. insert into t_borrow_order values('116','d002','2022-11-08 09:23:54');
  149. insert into t_borrow_order values('114','c001','2022-11-08 09:23:54');
  150. insert into t_borrow_order values('115','a005','2022-11-08 09:23:54');
  151. insert into t_borrow_order values('117','b004','2022-11-08 09:23:54');
  152. insert into t_borrow_order values('118','a007','2022-11-08 09:23:54');
  153. insert into t_borrow_order values('118','a004','2022-11-08 09:23:54');
  154. insert into t_borrow_order values('119','e003','2022-11-08 09:23:54');
  155. insert into t_borrow_order values('119','d001','2022-11-08 09:23:54');
  156. insert into t_borrow_order values('120','a002','2022-11-08 09:23:54');
  157. insert into t_borrow_order values('120','a004','2022-11-08 09:23:54');
  158. insert into t_borrow_order values('121','d005','2022-11-08 09:23:54');
  159. insert into t_borrow_order values('123','b006','2022-11-08 09:23:54');
  160. insert into t_borrow_order values('124','a002','2022-11-08 09:23:54');
  161. insert into t_borrow_order values('125','e004','2022-11-08 09:23:54');
  162. insert into t_borrow_order values('126','b002','2022-11-08 09:23:54');
  163. insert into t_borrow_order values('127','a003','2022-11-08 09:23:54');
  164. insert into t_borrow_order values('124','d002','2022-11-08 09:23:54');
  165. insert into t_borrow_order values('122','b001','2022-11-08 09:23:54');
  166. insert into t_borrow_order values('128','a001','2022-11-08 09:23:54');
  167. insert into t_borrow_order values('129','a006','2022-11-08 09:23:54');
  168. insert into t_borrow_order values('125','d008','2022-11-08 09:23:54');
  169. insert into t_borrow_order values('123','e008','2022-11-08 09:23:54');
  170. insert into t_borrow_order values('120','a005','2022-11-08 09:23:54');
  171. insert into t_borrow_order values('130','b007','2022-11-08 09:23:54');
  172. insert into t_borrow_order values('131','a007','2022-11-08 09:23:54');
  173. insert into t_borrow_order values('132','a008','2022-11-08 09:23:54');
  174. insert into t_borrow_order values('133','e004','2022-11-08 09:23:54');
  175. insert into t_borrow_order values('135','a004','2022-11-08 09:23:54');
  176. insert into t_borrow_order values('133','d003','2022-11-08 09:23:54');
  177. insert into t_borrow_order values('136','a003','2022-11-08 09:23:54');
  178. insert into t_borrow_order values('138','a004','2022-11-08 09:23:54');
  179. insert into t_borrow_order values('139','d001','2022-11-08 09:23:54');
  180. insert into t_borrow_order values('133','e001','2022-11-08 09:23:54');
  181. insert into t_borrow_order values('133','a002','2022-11-08 09:23:54');
  182. insert into t_borrow_order values('132','e002','2022-11-08 09:23:54');
  183. insert into t_borrow_order values('131','a003','2022-11-08 09:23:54');
  184. insert into t_borrow_order values('141','b003','2022-11-08 09:23:54');
  185. insert into t_borrow_order values('140','e003','2022-11-08 09:23:54');
  186. insert into t_borrow_order values('142','a005','2022-11-08 09:23:54');
  187. insert into t_borrow_order values('142','d005','2022-11-08 09:23:54');
  188. insert into t_borrow_order values('146','b006','2022-11-08 09:23:54');
  189. insert into t_borrow_order values('144','d006','2022-11-08 09:23:54');
  190. insert into t_borrow_order values('148','d007','2022-11-08 09:23:54');
  191. insert into t_borrow_order values('144','e003','2022-11-08 09:23:54');
  192. insert into t_borrow_order values('142','a003','2022-11-08 09:23:54');
  193. insert into t_borrow_order values('143','e003','2022-11-08 09:23:54');
  194. insert into t_borrow_order values('149','d004','2022-11-08 09:23:54');
  195. insert into t_borrow_order values('150','a005','2022-11-08 09:23:54');
  196. insert into t_borrow_order values('151','a005','2022-11-08 09:23:54');
  197. insert into t_borrow_order values('151','a002','2022-11-08 09:23:54');
  198. insert into t_borrow_order values('154','d003','2022-11-08 09:23:54');
  199. insert into t_borrow_order values('153','d002','2022-11-08 09:23:54');
  200. insert into t_borrow_order values('156','a002','2022-11-08 09:23:54');
  201. insert into t_borrow_order values('155','d003','2022-11-08 09:23:54');
  202. insert into t_borrow_order values('157','a004','2022-11-08 09:23:54');
  203. insert into t_borrow_order values('158','d005','2022-11-08 09:23:54');
  204. insert into t_borrow_order values('159','a005','2022-11-08 09:23:54');
  205. insert into t_borrow_order values('154','c006','2022-11-08 09:23:54');
  206. insert into t_borrow_order values('153','d007','2022-11-08 09:23:54');
  207. insert into t_borrow_order values('152','c004','2022-11-08 09:23:54');
  208. insert into t_borrow_order values('154','a004','2022-11-08 09:23:54');
  209. insert into t_borrow_order values('151','d003','2022-11-08 09:23:54');
  210. insert into t_borrow_order values('152','a002','2022-11-08 09:23:54');
  211. insert into t_borrow_order values('162','c003','2022-11-08 09:23:54');
  212. insert into t_borrow_order values('161','a001','2022-11-08 09:23:54');
  213. insert into t_borrow_order values('166','d002','2022-11-08 09:23:54');
  214. insert into t_borrow_order values('163','a002','2022-11-08 09:23:54');
  215. insert into t_borrow_order values('167','c003','2022-11-08 09:23:54');
  216. insert into t_borrow_order values('169','a005','2022-11-08 09:23:54');
  217. commit;
复制代码
   init_book_result.sql

  1. -- 设置sql_mode
  2. set sql_mode = 'NO_ENGINE_SUBSTITUTION,STRICT_TRANS_TABLES';
  3. -- 创建数据库result,并进行切换
  4. create database book_result;
  5. use book_result;
  6. -- 创建图书类别浏览量表
  7. CREATE TABLE t_books_type_borrow_count(
  8.         type_name        varchar(100) not null,
  9.         borrow_count        int not null
  10. )DEFAULT CHARSET='utf8';
复制代码
2.通过sqoop将mysql中的业务数据导入到大数据平台(hive)

在hive中创建映射的数据库
init_hive.sql
  1. create database library;
  2. use library;
  3. -- 创建用户信息表
  4. create table t_user_info (
  5.     user_id STRING    comment "用户id",   
  6.     user_name STRING  comment "用户姓名",
  7.     sex STRING        comment "用户姓名",      
  8.     age INT           comment "用户性别"
  9. )
  10. comment "用户信息表"
  11. row format delimited fields terminated by ','
  12. stored as textfile;
  13. -- 创建图书表
  14. create table t_book_info (
  15.     book_id STRING    comment "图书id",   
  16.     book_name STRING  comment "书名",
  17.     type_id STRING    comment "类别id"
  18. )
  19. comment "图书表"
  20. row format delimited fields terminated by ','
  21. stored as textfile;
  22. -- 创建图书类别表
  23. create table dim_books_type (
  24.     type_id STRING    comment "类别id",   
  25.     type_name STRING  comment "类别名"
  26. )
  27. comment "图书类别表"
  28. row format delimited fields terminated by ','
  29. stored as textfile;
  30. -- 创建借书表
  31. create table t_borrow_order (
  32.     user_id STRING     comment "用户id",   
  33.     book_id STRING     comment "图书id",   
  34.     create_time STRING comment "创建时间"
  35. )
  36. comment "借书表"
  37. row format delimited fields terminated by ','
  38. stored as textfile;
复制代码
source /opt/sql/library/init_hive.sql;

 导入mysql数据到hive中

此次mysql与hive中的表名都雷同
sqoop import \
                                     虚拟机      端口号 mysql中的数据库名
--connect jdbc:mysql://bigdata004:3306/mall \  
--username root \
--password root123 \
           mysql中的表名
--table t_user_info \
--num-mappers 1 \
--hive-import \
--fields-terminated-by "," \
--hive-overwrite \
导入dim_books_type
  1. sqoop import \
  2. --connect jdbc:mysql://bigdata004:3306/library \
  3. --username root \
  4. --password root123 \
  5. --table dim_books_type \
  6. --num-mappers 1 \
  7. --hive-import \
  8. --fields-terminated-by "," \
  9. --hive-overwrite \
  10. --hive-table library.dim_books_type
复制代码
3.通过hive举行数据计算和数据分析 形成数据报表

  1. --切换数据库
  2. use library;
  3. --创建图书类别借阅表
  4. --从借书表中获取图书id
  5. --从图书表中获取图书id的类别id
  6. --从类别表中获取类别名
  7. --表中显示 类名 和 该类名的总数()
  8. create table if not exists library.dws_borrow_books_type_count
  9. as
  10.         select t3.type_name,count(t2.type_id) as borrow_count from (select book_id from t_borrow_order) t1
  11.         inner join t_book_info t2 on t1.book_id=t2.book_id
  12.         inner join dim_books_type t3 on t2.type_id=t3.type_id
  13.         group by t3.type_name;
复制代码
4.再通过sqoop将数据报表导出到mysql 

-- sqoop导出数据到mysql
sqoop export \
                                   主机名                 数据库
--connect jdbc:mysql://bigdata004:3306/result \
--username root \
--password root123 \
                    mysql上的表名
--table  t_city_sale_total \
--num-mappers 1 \
                    hdfs上的数据库位置
--export-dir /user/hive/warehouse/mall_bigdata.db/dws_sale_order_city_total \
--input-fields-terminated-by  "\001"
  1. -- sqoop导出数据到mysql
  2. sqoop export \
  3. --connect jdbc:mysql://bigdata004:3306/book_result \
  4. --username root \
  5. --password root123 \
  6. --table t_books_type_borrow_count \
  7. --num-mappers 1 \
  8. --export-dir /user/hive/warehouse/library.db/dws_borrow_books_type_count \
  9. --input-fields-terminated-by "\001"
复制代码

5.使用FineReport制作数据报表




免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。
回复

使用道具 举报

0 个回复

倒序浏览

快速回复

您需要登录后才可以回帖 登录 or 立即注册

本版积分规则

熊熊出没

金牌会员
这个人很懒什么都没写!

标签云

快速回复 返回顶部 返回列表