yum clean all
yum makecache
yum -y update 2.下载python3
yum install python3 3.查找Hadoop Streaming工具
cd /export/server/hadoop-3.2.2/share/hadoop/tools/lib
ls
4.对文本内容进行统计
创建文件夹命令:mkdir /WordCountTask
vi mapper.py
进入后:
#!/usr/bin/python2
import sys
for fs in sys.stdin:
fss = fs.split()
if len(fss) == 15:
if fss[11] == "200":
print(fss[0],fss[1],fss[2],fss[3],fss[6],fss[8],fss[11])
5.运行第3题脚本
$HADOOP_HOME/ /export/server/hadoop-3.2.2/
hadoop jar /export/server/hadoop-3.2.2/share/hadoop/tools/lib/hadoop-streaming-3.2.2.jar \
-input /web/nginx/log/ \
-output /web/nginx/result1 \
-file /WordCountTask/mapper.py \
-mapper /WordCountTask/mapper.py
6.运行第4题脚本 \
进WordCountTask里
vi /m1.py
进入后:
#!/usr/bin/python2
import sys
for syes in sys.stdin:
i_01 = syes.split()
if len(i_01) == 15:
if i_01[11] == "200":
print(i_01[8])
编写下一个
vi /m2.py
进入后:
#!/usr/bin/python2
import sys
i_02 = {}
for syees in sys_stdin:
if syees in i_02:
i_02[syees] += 1
else:
i_02[syees] = 1
for y in i_02:
a = y.replace('\n','').replace('\t','')
print(a,i_02[y])
7.第4题运行
hadoop jar /export/server/hadoop-3.2.2/share/hadoop/tools/lib/hadoop-streaming-3.2.2.jar \
-input /web/nginx/log/ \
-output /web/nginx/result1 \
-file /WordCountTask/mapper.py \
-mapper /WordCountTask/mapper.py \
-file /WordCountTask/m1.py \
-reducer /WordCountTask/m2.py