Network Security Internet Technology Development Database Servers Mobile Phone Android Software Apple Software Computer Software News IT Information

In addition to Weibo, there is also WeChat

Please pay attention

WeChat public account

Shulou

An example of hadoop MR Statistical Analysis Log script

2025-01-27 Update From: SLTechnology News&Howtos shulou NAV: SLTechnology News&Howtos > Internet Technology >

Share

Shulou(Shulou.com)06/03 Report--

#! / bin/sh

# #

# split today and yesterday

For i in $(seq 10)

Do

Echo "> > / u1/hadoop-stat/stat.log

Done

Echo "begin [" `date "+% Y-%m-%d"-d "- 1 days" `"]" > > / u1/hadoop-stat/stat.log

# #

# remove file

Function removeFilepathNotCurrentMonth () {

Month= `date "+% Ymuri% m"-d "- 1 days" `

For file in `ls $1`

Do

If ["$month"! = "$file"]; then

Rm-rf $1 "/" $file

Fi

Done

}

GYLOG_PATH= "/ u1/hadoop-stat/gylog"

NGINXLOG_PATH= "/ u1/hadoop-stat/nginxlog"

Echo "begin remove gylogpath's files not in current month" > > / u1/hadoop-stat/stat.log

RemoveFilepathNotCurrentMonth $GYLOG_PATH

Echo "begin remove nginxlogpath's files not in current month" > > / u1/hadoop-stat/stat.log

RemoveFilepathNotCurrentMonth $NGINXLOG_PATH

# #

# scp file between hosts

Day= `date "+% Y-%m-%d"-d "- 1 days" `

Month= `date "+% Ymuri% m"-d "- 1 days" `

Gyfilename= "gylog-" $day ".log"

Gyfilepath=$GYLOG_PATH "/" $month

If [!-d "$gyfilepath"]; then

Mkdir "$gyfilepath"

Fi

If [!-f "$gyfilepath/$gyfilename"]; then

Echo "begin scp gylog" > > / u1/hadoop-stat/stat.log

Scp gy02:/u1/logs/gylog/$gyfilename $gyfilepath/

Fi

Nginxfilename= "ngxinlog-" $day ".log"

Nginxfilepath=$NGINXLOG_PATH "/" $month

If [!-d "$nginxfilepath"]; then

Mkdir "$nginxfilepath"

Fi

If [!-f "$nginxfilepath/$nginxfilename"]; then

Echo "begin scp nginxlog" > > / u1/hadoop-stat/stat.log

Scp gy01:/u1/logs/lbnginx/gy_access.log.1$ nginxfilepath/

Mv $nginxfilepath/gy_access.log.1$ nginxfilepath/$nginxfilename

Fi

# #

# copy file to hadoop

GYLOG_HADOOP_PATH= "/ logs/gylog"

NGINXLOG_HADOOP_PATH= "/ logs/nginxlog"

Monthhadoop= `date "+% Y-%m-%d"-d "- 1 days" `

Gyhadoopfilepath=$GYLOG_HADOOP_PATH "/" $monthhadoop

Gyhadoopfilepathinput=$gyhadoopfilepath "/ input"

Gyhadoopfilepathoutput=$gyhadoopfilepath "/ output"

/ u1/hadoop-1.0.1/bin/hadoop dfs-test-e $gyhadoopfilepath

If [$?-ne 0]; then

Echo "begin mkdir gyhadoopfilepath in hadoop because of not exist:" $gyhadoopfilepath > > / u1/hadoop-stat/stat.log

/ u1/hadoop-1.0.1/bin/hadoop dfs-mkdir $gyhadoopfilepath

/ u1/hadoop-1.0.1/bin/hadoop dfs-mkdir $gyhadoopfilepathinput

/ u1/hadoop-1.0.1/bin/hadoop dfs-mkdir $gyhadoopfilepathoutput

Fi

/ u1/hadoop-1.0.1/bin/hadoop dfs-test-e $gyhadoopfilepathinput/$gyfilename

If [$?-ne 0]; then

Echo "begin copy gyhadoopfile to hadoop" > > / u1/hadoop-stat/stat.log

/ u1/hadoop-1.0.1/bin/hadoop dfs-copyFromLocal $gyfilepath/$gyfilename $gyhadoopfilepathinput/

Fi

Nginxhadoopfilepath=$NGINXLOG_HADOOP_PATH "/" $monthhadoop

Nginxhadoopfilepathinput=$nginxhadoopfilepath "/ input"

Nginxhadoopfilepathoutput=$nginxhadoopfilepath "/ output"

/ u1/hadoop-1.0.1/bin/hadoop dfs-test-e $nginxhadoopfilepath

If [$?-ne 0]; then

Echo "begin mkdir nginxhadoopfilepath in hadoop because of not exist:" $nginxhadoopfilepath > > / u1/hadoop-stat/stat.log

/ u1/hadoop-1.0.1/bin/hadoop dfs-mkdir $nginxhadoopfilepath

/ u1/hadoop-1.0.1/bin/hadoop dfs-mkdir $nginxhadoopfilepathinput

/ u1/hadoop-1.0.1/bin/hadoop dfs-mkdir $nginxhadoopfilepathoutput

Fi

/ u1/hadoop-1.0.1/bin/hadoop dfs-test-e $nginxhadoopfilepathinput/$nginxfilename

If [$?-ne 0]; then

Echo "begin copy nginxhadoopfile to hadoop" > > / u1/hadoop-stat/stat.log

/ u1/hadoop-1.0.1/bin/hadoop dfs-copyFromLocal $nginxfilepath/$nginxfilename $nginxhadoopfilepathinput/

Fi

# #

# begin hadoop stat

# echo "begin hadoop stat RequestTimeCount" > > / u1/hadoop-stat/stat.log

# / u1/hadoop-1.0.1/bin/hadoop jar / u1/hadoop-stat/stat.jar gy.log.mr.requestTime.RequestTimeCount $day

# echo "begin hadoop stat RequestCount" > > / u1/hadoop-stat/stat.log

# / u1/hadoop-1.0.1/bin/hadoop jar / u1/hadoop-stat/stat.jar gy.log.mr.request.RequestCount $day

Echo "begin hadoop stat NginxCount" > > / u1/hadoop-stat/stat.log

/ u1/hadoop-1.0.1/bin/hadoop jar / u1/hadoop-stat/stat.jar gy.log.mr.nginx.NginxCount $day

Echo "begin hadoop stat GylogCount" > > / u1/hadoop-stat/stat.log

/ u1/hadoop-1.0.1/bin/hadoop jar / u1/hadoop-stat/stat.jar gy.log.mr.gylog.GylogCount $day

# #

# end for all

Echo "end [" `date "+% Y-%m-%d"-d "- 1 days" `"]" > > / u1/hadoop-stat/stat.log

Note:

/ u1/hadoop-stat/stat.jar gy.log.mr.request.RequestCount

/ u1/hadoop-stat/stat.jar gy.log.mr.nginx.NginxCount

/ u1/hadoop-stat/stat.jar gy.log.mr.gylog.GylogCount

The above mr is a custom statistical rule, which can be developed according to your own needs.

Others mainly use the basic commands of hadoop. I believe it is easy to understand what you know about hadoop.

Welcome to subscribe "Shulou Technology Information " to get latest news, interesting things and hot topics in the IT industry, and controls the hottest and latest Internet news, technology news and IT industry trends.

Views: 0

*The comments in the above article only represent the author's personal views and do not represent the views and positions of this website. If you have more insights, please feel free to contribute and share.

Share To

Internet Technology

Wechat

© 2024 shulou.com SLNews company. All rights reserved.

12
Report