まずは、Linuxサーバを1台準備する。
[root@centos /]# /usr/sbin/adduser hadoop
[root@centos /]# passwd hadoop
Changing password for user hadoop.
New UNIX password:
BAD PASSWORD: it is based on a dictionary word
Retype new UNIX password:
passwd: all authentication tokens updated successfully.
[root@centos /]# mkdir -p /opt/hadoop
[root@centos /]# chown hadoop:hadoop /opt/hadoop
[root@centos opt]# wget http://ftp.riken.jp/net/apache/dist/hadoop/core/hadoop-0.20.2/hadoop-0.20.2.tar.gz
--05:47:38-- http://ftp.riken.jp/net/apache/dist/hadoop/core/hadoop-0.20.2/hadoop-0.20.2.tar.gz
ftp.riken.jp をDNSに問いあわせています... 134.160.38.1
ftp.riken.jp|134.160.38.1|:80 に接続しています... 接続しました。
HTTP による接続要求を送信しました、応答を待っています... 404 Not Found
05:47:38 エラー 404: Not Found。
[root@centos opt]# wget http://ftp.kddilabs.jp/infosystems/apache/hadoop/c/stable/hadoop-0.20.2.tar.gz
--06:02:04-- http://ftp.kddilabs.jp/infosystems/apache/hadoop/c/stable/hadoop-0.20.2.tar.gz
ftp.kddilabs.jp をDNSに問いあわせています... 192.26.91.193, 2001:200:601:10:206:5bff:fef0:466c
ftp.kddilabs.jp|192.26.91.193|:80 に接続しています... 接続しました。
HTTP による接続要求を送信しました、応答を待っています... 404 Not Found
06:02:04 エラー 404: Not Found。
[root@centos opt]# wget http://www.meisei-u.ac.jp/mirror/apache/dist/hadoop/core/hadoop-0.20.2/hadoop-0.20.2.tar.gz
--06:04:33-- http://www.meisei-u.ac.jp/mirror/apache/dist/hadoop/core/hadoop-0.20.2/hadoop-0.20.2.tar.gz
www.meisei-u.ac.jp をDNSに問いあわせています... 202.232.192.34
www.meisei-u.ac.jp|202.232.192.34|:80 に接続しています... 接続しました。
HTTP による接続要求を送信しました、応答を待っています... 200 OK
長さ: 44575568 (43M) [application/x-gzip]
Saving to: `hadoop-0.20.2.tar.gz'
100%[=======================================>] 44,575,568 2.63M/s in 19s
06:05:03 (2.19 MB/s) - `hadoop-0.20.2.tar.gz' を保存しました [44575568/44575568]
[root@centos conf]# vi core-site.xml
[root@centos conf]# vi hdfs-site.xml
[root@centos conf]# vi mapred-site.xml
[root@centos conf]# exit
exit
[hadoop@centos conf]$ cd ..
[hadoop@centos hadoop-0.20.2]$ bin/hadoop namenode -format # フォーマット
10/06/02 07:07:11 INFO namenode.NameNode: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG: host = centos.com/172.21.44.141
STARTUP_MSG: args = [-format]
STARTUP_MSG: version = 0.20.2
STARTUP_MSG: build = https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.20 -r 911707; compiled by 'chrisdo' on Fri Feb 19 08:07:34 UTC 2010
************************************************************/
10/06/02 07:07:11 INFO namenode.FSNamesystem: fsOwner=hadoop,hadoop
10/06/02 07:07:11 INFO namenode.FSNamesystem: supergroup=supergroup
10/06/02 07:07:11 INFO namenode.FSNamesystem: isPermissionEnabled=true
10/06/02 07:07:11 INFO common.Storage: Image file of size 96 saved in 0 seconds.
10/06/02 07:07:11 INFO common.Storage: Storage directory /tmp/hadoop-hadoop/dfs/name has been successfully formatted.
10/06/02 07:07:11 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at centos.com/172.21.44.141
************************************************************/
[hadoop@centos hadoop-0.20.2]$ cd /tmp
[hadoop@centos tmp]$ ls
gconfd-orca keyring-ChkLJT scim-helper-manager-socket-orca
hadoop-hadoop keyring-JYCGfq scim-panel-socket:0-orca
hsperfdata_hadoop keyring-mmHmy1 scim-socket-frontend-orca
hsperfdata_orca mapping-orca ssh-NKmMdD2893
hsperfdata_root orbit-orca virtual-orca.V6Hxl5
[hadoop@centos tmp]$ cd hadoop-hadoop
[hadoop@centos hadoop-hadoop]$ ls
dfs
[hadoop@centos hadoop-hadoop]$ cd dfs
[hadoop@centos dfs]$ ls
name
[hadoop@centos dfs]$ cd name
[hadoop@centos name]$ ls
current image
$
[hadoop@centos .ssh]$ cat id_dsa.pub >> authorized_keys
[hadoop@centos .ssh]$ chmod 600 authorized_keys
[hadoop@centos .ssh]$ cd ..
[hadoop@centos ~]$ ssh localhost
[hadoop@centos ~]$ cd /opt/hadoop/hadoop-0.20.2
[hadoop@centos hadoop-0.20.2]$ ./bin/start-all.sh # 起動
starting namenode, logging to /opt/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-namenode-centos.com.out
localhost: starting datanode, logging to /opt/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-datanode-centos.com.out
localhost: starting secondarynamenode, logging to /opt/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-secondarynamenode-centos.com.out
starting jobtracker, logging to /opt/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-jobtracker-centos.com.out
localhost: starting tasktracker, logging to /opt/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-tasktracker-centos.com.out
[hadoop@centos hadoop-0.20.2]$ jps # 起動確認
30217 TaskTracker
29925 DataNode
30112 JobTracker
30050 SecondaryNameNode
30273 Jps
29815 NameNode
[hadoop@centos hadoop-0.20.2]$ bin/hadoop jar hadoop-0.20.2-examples.jar pi 1 1000
Number of Maps = 1
Samples per Map = 1000
Wrote input for Map #0
Starting Job
10/06/02 07:56:20 INFO mapred.FileInputFormat: Total input paths to process : 1
10/06/02 07:56:21 INFO mapred.JobClient: Running job: job_201006020729_0001
10/06/02 07:56:22 INFO mapred.JobClient: map 0% reduce 0%
10/06/02 07:56:30 INFO mapred.JobClient: map 100% reduce 0%
10/06/02 07:56:42 INFO mapred.JobClient: map 100% reduce 100%
10/06/02 07:56:44 INFO mapred.JobClient: Job complete: job_201006020729_0001
10/06/02 07:56:44 INFO mapred.JobClient: Counters: 18
10/06/02 07:56:44 INFO mapred.JobClient: Job Counters
10/06/02 07:56:44 INFO mapred.JobClient: Launched reduce tasks=1
10/06/02 07:56:44 INFO mapred.JobClient: Launched map tasks=1
10/06/02 07:56:44 INFO mapred.JobClient: Data-local map tasks=1
10/06/02 07:56:44 INFO mapred.JobClient: FileSystemCounters
10/06/02 07:56:44 INFO mapred.JobClient: FILE_BYTES_READ=28
10/06/02 07:56:44 INFO mapred.JobClient: HDFS_BYTES_READ=118
10/06/02 07:56:44 INFO mapred.JobClient: FILE_BYTES_WRITTEN=88
10/06/02 07:56:44 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=215
10/06/02 07:56:44 INFO mapred.JobClient: Map-Reduce Framework
10/06/02 07:56:44 INFO mapred.JobClient: Reduce input groups=2
10/06/02 07:56:44 INFO mapred.JobClient: Combine output records=0
10/06/02 07:56:44 INFO mapred.JobClient: Map input records=1
10/06/02 07:56:44 INFO mapred.JobClient: Reduce shuffle bytes=28
10/06/02 07:56:44 INFO mapred.JobClient: Reduce output records=0
10/06/02 07:56:44 INFO mapred.JobClient: Spilled Records=4
10/06/02 07:56:44 INFO mapred.JobClient: Map output bytes=18
10/06/02 07:56:44 INFO mapred.JobClient: Map input bytes=24
10/06/02 07:56:44 INFO mapred.JobClient: Combine input records=0
10/06/02 07:56:44 INFO mapred.JobClient: Map output records=2
10/06/02 07:56:44 INFO mapred.JobClient: Reduce input records=2
Job Finished in 23.678 seconds
Estimated value of Pi is 3.14800000000000000000
[hadoop@centos hadoop-0.20.2]$ bin/hadoop jar hadoop-0.20.2-examples.jar pi 10 1000000
Number of Maps = 10
Samples per Map = 1000000
Wrote input for Map #0
Wrote input for Map #1
Wrote input for Map #2
Wrote input for Map #3
Wrote input for Map #4
Wrote input for Map #5
Wrote input for Map #6
Wrote input for Map #7
Wrote input for Map #8
Wrote input for Map #9
Starting Job
10/06/03 01:31:26 INFO mapred.FileInputFormat: Total input paths to process : 10
10/06/03 01:31:26 INFO mapred.JobClient: Running job: job_201006020729_0004
10/06/03 01:31:27 INFO mapred.JobClient: map 0% reduce 0%
10/06/03 01:31:34 INFO mapred.JobClient: map 10% reduce 0%
10/06/03 01:31:36 INFO mapred.JobClient: map 30% reduce 0%
10/06/03 01:31:39 INFO mapred.JobClient: map 40% reduce 0%
10/06/03 01:31:42 INFO mapred.JobClient: map 60% reduce 13%
10/06/03 01:31:48 INFO mapred.JobClient: map 80% reduce 13%
10/06/03 01:31:51 INFO mapred.JobClient: map 80% reduce 20%
10/06/03 01:31:54 INFO mapred.JobClient: map 100% reduce 20%
10/06/03 01:31:57 INFO mapred.JobClient: map 100% reduce 26%
10/06/03 01:32:03 INFO mapred.JobClient: map 100% reduce 100%
10/06/03 01:32:05 INFO mapred.JobClient: Job complete: job_201006020729_0004
10/06/03 01:32:05 INFO mapred.JobClient: Counters: 18
10/06/03 01:32:05 INFO mapred.JobClient: Job Counters
10/06/03 01:32:05 INFO mapred.JobClient: Launched reduce tasks=1
10/06/03 01:32:05 INFO mapred.JobClient: Launched map tasks=10
10/06/03 01:32:05 INFO mapred.JobClient: Data-local map tasks=10
10/06/03 01:32:05 INFO mapred.JobClient: FileSystemCounters
10/06/03 01:32:05 INFO mapred.JobClient: FILE_BYTES_READ=226
10/06/03 01:32:05 INFO mapred.JobClient: HDFS_BYTES_READ=1180
10/06/03 01:32:05 INFO mapred.JobClient: FILE_BYTES_WRITTEN=826
10/06/03 01:32:05 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=215
10/06/03 01:32:05 INFO mapred.JobClient: Map-Reduce Framework
10/06/03 01:32:05 INFO mapred.JobClient: Reduce input groups=20
10/06/03 01:32:05 INFO mapred.JobClient: Combine output records=0
10/06/03 01:32:05 INFO mapred.JobClient: Map input records=10
10/06/03 01:32:05 INFO mapred.JobClient: Reduce shuffle bytes=280
10/06/03 01:32:05 INFO mapred.JobClient: Reduce output records=0
10/06/03 01:32:05 INFO mapred.JobClient: Spilled Records=40
10/06/03 01:32:05 INFO mapred.JobClient: Map output bytes=180
10/06/03 01:32:05 INFO mapred.JobClient: Map input bytes=240
10/06/03 01:32:05 INFO mapred.JobClient: Combine input records=0
10/06/03 01:32:05 INFO mapred.JobClient: Map output records=20
10/06/03 01:32:05 INFO mapred.JobClient: Reduce input records=20
Job Finished in 39.83 seconds
Estimated value of Pi is 3.14158440000000000000 #かなり近い数字になりだした
[hadoop@centos hadoop-0.20.2]$
HDFSを体験、まず領域を確保してデータを取り込む
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -ls / #領域表示
Found 2 items
drwxr-xr-x - hadoop supergroup 0 2010-06-02 07:30 /tmp
drwxr-xr-x - hadoop supergroup 0 2010-06-02 07:56 /user
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -mkdir ./nikkeibp #領域確保
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -ls #領域表示
Found 1 items
drwxr-xr-x - hadoop supergroup 0 2010-06-03 00:49 /user/hadoop/nikkeibp
[hadoop@centos ~]$ wget http://itpro.nikkeibp.co.jp/article/MAG/20100323/346107/itplog.zip
--00:58:24-- http://itpro.nikkeibp.co.jp/article/MAG/20100323/346107/itplog.zip
itpro.nikkeibp.co.jp をDNSに問いあわせています... 202.214.174.88
itpro.nikkeibp.co.jp|202.214.174.88|:80 に接続しています... 接続しました。
HTTP による接続要求を送信しました、応答を待っています... 200 OK
長さ: 13873458 (13M) [application/zip]
Saving to: `itplog.zip'
100%[=======================================>] 13,873,458 264K/s in 51s
00:59:26 (265 KB/s) - `itplog.zip' を保存しました [13873458/13873458]
[hadoop@centos ~]$ cd /opt/hadoop/hadoop-0.20.2
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -copyFromlocal /home/hadoop/log_finish.txt /nikkeibp
copyFromlocal: Unknown command
Usage: java FsShell
[-ls ]
[-lsr ]
[-du ]
[-dus ]
[-count[-q] ]
[-mv ]
[-cp ]
[-rm [-skipTrash] ]
[-rmr [-skipTrash] ]
[-expunge]
[-put ... ]
[-copyFromLocal ... ]
[-moveFromLocal ... ]
[-get [-ignoreCrc] [-crc] ]
[-getmerge [addnl]]
[-cat ]
[-text ]
[-copyToLocal [-ignoreCrc] [-crc] ]
[-moveToLocal [-crc] ]
[-mkdir ]
[-setrep [-R] [-w] ]
[-touchz ]
[-test -[ezd] ]
[-stat [format] ]
[-tail [-f] ]
[-chmod [-R] PATH...]
[-chown [-R] [OWNER][:[GROUP]] PATH...]
[-chgrp [-R] GROUP PATH...]
[-help [cmd]]
Generic options supported are
-conf specify an application configuration file
-D use value for given property
-fs specify a namenode
-jt specify a job tracker
-files specify comma separated files to be copied to the map reduce cluster
-libjars specify comma separated jar files to include in the classpath.
-archives specify comma separated archives to be unarchived on the compute machines.
The general command line syntax is #スペルが違っていたようです
bin/hadoop command [genericOptions] [commandOptions]
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -copyFromLocal /home/hadoop/log_finish.txt ./nikkeibp #HDFS側へ複写
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -ls ./nikkeibp
Found 1 items
-rw-r--r-- 1 hadoop supergroup 157164610 2010-06-03 01:08 /user/hadoop/nikkeibp/log_finish.txt
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -cat ./nikkeibp/log_finish.txt | head
110.0.146.55 - - [12/Jan/2010:06:00:10 +0900] "GET /linux/ HTTP/1.0" 302 1102
120.0.137.1 - - [12/Jan/2010:06:00:39 +0900] "GET /article/COLUMN/20100104/342856/Linux.jpg HTTP/1.0" 200 70635
110.1.16.142 - - [12/Jan/2010:06:00:48 +0900] "GET /article/COLUMN/20060227/230772/ HTTP/1.1" 200 25265
110.1.16.142 - - [12/Jan/2010:06:01:06 +0900] "GET /article/COLUMN/20060228/230982/ HTTP/1.1" 200 27311
110.1.16.142 - - [12/Jan/2010:06:01:21 +0900] "GET /article/COLUMN/20060224/230556/ HTTP/1.1" 200 29319
110.1.16.142 - - [12/Jan/2010:06:01:22 +0900] "GET /article/COLUMN/20060227/230745/ HTTP/1.1" 200 26691
110.0.183.64 - - [12/Jan/2010:06:01:24 +0900] "GET /article/COLUMN/20091225/342588/ HTTP/1.0" 200 24791
110.1.16.142 - - [12/Jan/2010:06:03:05 +0900] "GET /article/COLUMN/20060228/231157/ HTTP/1.1" 200 25564
110.1.16.142 - - [12/Jan/2010:06:03:10 +0900] "GET /article/COLUMN/20060224/230567/ HTTP/1.1" 200 28584
110.1.16.142 - - [12/Jan/2010:06:03:59 +0900] "GET /article/COLUMN/20060227/230715/ HTTP/1.1" 200 26146
cat: Unable to write to output stream.
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -cp ./nikkeibp/log_finish.txt ./nikkeibp/log_finish_copy.txt #複写
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -ls ./nikkeibp Found 2 items
-rw-r--r-- 1 hadoop supergroup 157164610 2010-06-03 01:08 /user/hadoop/nikkeibp/log_finish.txt
-rw-r--r-- 1 hadoop supergroup 157164610 2010-06-03 01:21 /user/hadoop/nikkeibp/log_finish_copy.txt
150万件のデータ処理を行ってみましょう
[hadoop@centos hadoop-0.20.2]$ bin/hadoop jar hadoop-0.20.2-examples.jar grep ./nikkeibp/log_finish.txt ./nikkeibp/url3_freq "GET (\\S+)" 1
10/06/03 02:22:55 INFO mapred.FileInputFormat: Total input paths to process : 1
10/06/03 02:22:55 INFO mapred.JobClient: Running job: job_201006020729_0012
10/06/03 02:22:56 INFO mapred.JobClient: map 0% reduce 0%
10/06/03 02:23:10 INFO mapred.JobClient: map 28% reduce 0%
10/06/03 02:23:13 INFO mapred.JobClient: map 36% reduce 0%
10/06/03 02:23:16 INFO mapred.JobClient: map 54% reduce 0%
10/06/03 02:23:19 INFO mapred.JobClient: map 61% reduce 0%
10/06/03 02:23:22 INFO mapred.JobClient: map 66% reduce 0%
10/06/03 02:23:25 INFO mapred.JobClient: map 100% reduce 0%
10/06/03 02:23:34 INFO mapred.JobClient: map 100% reduce 100%
10/06/03 02:23:36 INFO mapred.JobClient: Job complete: job_201006020729_0012
10/06/03 02:23:36 INFO mapred.JobClient: Counters: 18
10/06/03 02:23:36 INFO mapred.JobClient: Job Counters
10/06/03 02:23:36 INFO mapred.JobClient: Launched reduce tasks=1
10/06/03 02:23:36 INFO mapred.JobClient: Launched map tasks=3
10/06/03 02:23:36 INFO mapred.JobClient: Data-local map tasks=3
10/06/03 02:23:36 INFO mapred.JobClient: FileSystemCounters
10/06/03 02:23:36 INFO mapred.JobClient: FILE_BYTES_READ=3424755
10/06/03 02:23:36 INFO mapred.JobClient: HDFS_BYTES_READ=157172804
10/06/03 02:23:36 INFO mapred.JobClient: FILE_BYTES_WRITTEN=4960482
10/06/03 02:23:36 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=872794
10/06/03 02:23:36 INFO mapred.JobClient: Map-Reduce Framework
10/06/03 02:23:36 INFO mapred.JobClient: Reduce input groups=12833
10/06/03 02:23:36 INFO mapred.JobClient: Combine output records=59909
10/06/03 02:23:36 INFO mapred.JobClient: Map input records=1455557
10/06/03 02:23:36 INFO mapred.JobClient: Reduce shuffle bytes=1535631
10/06/03 02:23:36 INFO mapred.JobClient: Reduce output records=12833
10/06/03 02:23:36 INFO mapred.JobClient: Spilled Records=86445
10/06/03 02:23:36 INFO mapred.JobClient: Map output bytes=66345140
10/06/03 02:23:36 INFO mapred.JobClient: Map input bytes=157164610
10/06/03 02:23:36 INFO mapred.JobClient: Combine input records=1488930
10/06/03 02:23:36 INFO mapred.JobClient: Map output records=1455557
10/06/03 02:23:36 INFO mapred.JobClient: Reduce input records=26536
10/06/03 02:23:36 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
10/06/03 02:23:36 INFO mapred.FileInputFormat: Total input paths to process : 1
10/06/03 02:23:36 INFO mapred.JobClient: Running job: job_201006020729_0013
10/06/03 02:23:37 INFO mapred.JobClient: map 0% reduce 0%
10/06/03 02:23:46 INFO mapred.JobClient: map 50% reduce 0%
10/06/03 02:23:49 INFO mapred.JobClient: map 100% reduce 0%
10/06/03 02:23:58 INFO mapred.JobClient: map 100% reduce 100%
10/06/03 02:24:00 INFO mapred.JobClient: Job complete: job_201006020729_0013
10/06/03 02:24:00 INFO mapred.JobClient: Counters: 18
10/06/03 02:24:00 INFO mapred.JobClient: Job Counters
10/06/03 02:24:00 INFO mapred.JobClient: Launched reduce tasks=1
10/06/03 02:24:00 INFO mapred.JobClient: Launched map tasks=2
10/06/03 02:24:00 INFO mapred.JobClient: Data-local map tasks=2
10/06/03 02:24:00 INFO mapred.JobClient: FileSystemCounters
10/06/03 02:24:00 INFO mapred.JobClient: FILE_BYTES_READ=787719
10/06/03 02:24:00 INFO mapred.JobClient: HDFS_BYTES_READ=873358
10/06/03 02:24:00 INFO mapred.JobClient: FILE_BYTES_WRITTEN=1575508
10/06/03 02:24:00 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=688832
10/06/03 02:24:00 INFO mapred.JobClient: Map-Reduce Framework
10/06/03 02:24:00 INFO mapred.JobClient: Reduce input groups=790
10/06/03 02:24:00 INFO mapred.JobClient: Combine output records=0
10/06/03 02:24:00 INFO mapred.JobClient: Map input records=12833
10/06/03 02:24:00 INFO mapred.JobClient: Reduce shuffle bytes=787725
10/06/03 02:24:00 INFO mapred.JobClient: Reduce output records=12833
10/06/03 02:24:00 INFO mapred.JobClient: Spilled Records=25666
10/06/03 02:24:00 INFO mapred.JobClient: Map output bytes=761584
10/06/03 02:24:00 INFO mapred.JobClient: Map input bytes=872708
10/06/03 02:24:00 INFO mapred.JobClient: Combine input records=0
10/06/03 02:24:00 INFO mapred.JobClient: Map output records=12833
10/06/03 02:24:00 INFO mapred.JobClient: Reduce input records=12833
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -ls ./nikkeibp/url3_freq Found 2 items
drwxr-xr-x - hadoop supergroup 0 2010-06-03 02:23 /user/hadoop/nikkeibp/url3_freq/_logs
-rw-r--r-- 1 hadoop supergroup 688832 2010-06-03 02:23 /user/hadoop/nikkeibp/url3_freq/part-00000
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -cat ./nikkeibp/url3_freq/part-00000|more #内容の確認
32164 /article/COLUMN/20060224/230573/
25124 /linux/index.html
18579 /linux/
12152 /linux/image/cover_small_1002.jpg
12104 /linux/image2006/object_square_01.gif
12089 /linux/image2006/title_info_01.gif
12082 /linux/image2006/title_linux_01.gif
12054 /linux/image/mook200906_w75.jpg
12052 /linux/image2006/object_new_01.gif
12050 /linux/image2006/btn_01.jpg
12044 /linux/image/mook200910_w75.jpg
12039 /linux/image2006/btn_02.jpg
12038 /linux/image2006/btn_04.jpg
12028 /linux/image2006/btn_07.jpg
12025 /linux/image2006/btn_05.jpg
12011 /linux/image2006/btn_03.jpg
11993 /linux/image/furoku_icon.jpg
11991 /linux/image2006/btn_08.jpg
11991 /linux/image2006/btn_09.jpg
11989 /linux/image2006/btn_10.jpg
11976 /linux/image2006/btn_12.jpg
11972 /linux/image2006/btn_15.jpg
11971 /linux/image2006/btn_06.jpg
[root@centos opt]#
[hadoop@centos hadoop-0.20.2]$ bin/hadoop jar hadoop-0.20.2-examples.jar sudoku src/examples/org/apache/hadoop/examples/dancing/puzzle1.dta
Solving src/examples/org/apache/hadoop/examples/dancing/puzzle1.dta
8 5 1 3 9 2 6 4 7
4 3 2 6 7 8 1 9 5
7 9 6 5 1 4 3 8 2
6 1 4 8 2 3 7 5 9
5 7 8 9 6 1 4 2 3
3 2 9 4 5 7 8 1 6
9 4 7 2 8 6 5 3 1
1 8 5 7 3 9 2 6 4
2 6 3 1 4 5 9 7 8
Found 1 solutions
[hadoop@centos hadoop-0.20.2]$
Ruby で Hadoopを
[hadoop@centos hadoop-0.20.2]$ bin/hadoop jar contrib/streaming/hadoop-0.20.2-streaming.jar -input ./nikkeibp/data -output ./nikkeibp/out -mapper "/opt/ruby/bin/ruby /opt/hadoop/hadoop-0.20.2/mapper.rb" -reducer "/opt/ruby/bin/ruby /opt/hadoop/hadoop-0.20.2/reducer.rb"
packageJobJar: [/tmp/hadoop-hadoop/hadoop-unjar26206/] [] /tmp/streamjob26207.jar tmpDir=null
10/06/07 15:44:10 INFO mapred.FileInputFormat: Total input paths to process : 1
10/06/07 15:44:10 INFO streaming.StreamJob: getLocalDirs(): [/tmp/hadoop-hadoop/mapred/local]
10/06/07 15:44:10 INFO streaming.StreamJob: Running job: job_201006070131_0006
10/06/07 15:44:10 INFO streaming.StreamJob: To kill this job, run:
10/06/07 15:44:10 INFO streaming.StreamJob: /opt/hadoop/hadoop-0.20.2/bin/../bin/hadoop job -Dmapred.job.tracker=localhost:54311 -kill job_201006070131_0006
10/06/07 15:44:10 INFO streaming.StreamJob: Tracking URL: http://localhost.localdomain:50030/jobdetails.jsp?jobid=job_201006070131_0006
10/06/07 15:44:11 INFO streaming.StreamJob: map 0% reduce 0%
10/06/07 15:44:17 INFO streaming.StreamJob: map 100% reduce 0%
10/06/07 15:44:29 INFO streaming.StreamJob: map 100% reduce 100%
10/06/07 15:44:32 INFO streaming.StreamJob: Job complete: job_201006070131_0006
10/06/07 15:44:32 INFO streaming.StreamJob: Output: ./nikkeibp/out
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -ls ./nikkeibp/out
Found 2 items
drwxr-xr-x - hadoop supergroup 0 2010-06-07 15:44 /user/hadoop/nikkeibp/out/_logs
-rw-r--r-- 1 hadoop supergroup 48 2010-06-07 15:44 /user/hadoop/nikkeibp/out/part-00000
[hadoop@centos hadoop-0.20.2]$ ./bin/hadoop fs -cat ./nikkeibp/out/part-00000 aaa 2
bbb 2
ccc 2
ddd 2
eee 1
fff 1
ggg 1
hhh 1
[hadoop@centos hadoop-0.20.2]$
|