This tutorial will provide step by step instruction to setup HDFS on Mac OS.
Download Apache Hadoop$HOME/hadoop/
$ java -version Output: java version "1.8.0_141" Java(TM) SE Runtime Environment (build 1.8.0_141-b15) Java HotSpot(TM) 64-Bit Server VM (build 25.141-b15, mixed mode)
## Set Java Home as env variable export JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_141.jdk/Contents/Home ## Set HADOOP environment variables export HADOOP_HOME=$HOME/hadoop/hadoop-3.0.3 export HADOOP_MAPRED_HOME=$HADOOP_HOME export HADOOP_COMMON_HOME=$HADOOP_HOME export HADOOP_HDFS_HOME=$HADOOP_HOME export HADOOP_YARN_HOME=$HADOOP_HOME export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native" export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native export YARN_HOME=$HADOOP_HOME export HADOOP_INSTALL=$HADOOP_HOME export HADOOP_CONF_DIR=$HADOOP_HOME export HADOOP_LIBEXEC_DIR=$HADOOP_HOME/libexec export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop export HADOOP_INSTALL=$HADOOP_HOME ## Set Path to the Hadoop Binary export PATH=$PATH:$HADOOP_HOME/binNote: Path of Java Home can be determined by using below command in Terminal
/usr/libexec/java_home
<!-- Place below properties inside configuration tag --> <!-- Specify the port for hadoop hdfs --> <property> <name>fs.defaultFS</name> <value>hdfs://localhost:9000</value> </property> <property> <name>dfs.replication</name> <value>1</value> </property>
<!-- Place below properties inside configuration tag --> <!-- Directory Path where hadoop file system will be created(create these hadoop_storage directory and specify the path) --> <property> <name>dfs.name.dir</name> <value>[$HOME Path]/hadoop/hadoop_storage</value> <final>true</final> </property> <!-- Default block size is 128 Mb --> <property> <name>dfs.blocksize</name> <value>134217728</value> </property> <!-- Replication factor is set as 1 as this is for local --> <property> <name>dfs.replication</name> <value>1</value> </property> <!-- Specify a particular Temporary directory, otherwise it will try to use defalut temp dir of system --> <property> <name>hadoop.tmp.dir</name> <value>[$HOME Path]/hadoop/tmp</value> </property> <!-- Below property is for hive setup --> <property> <name>hive.metastore.warehouse.dir</name> <value>hdfs://master:8020/user/hive/warehouse</value> </property>
<!-- Place below properties inside configuration tag --> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.env-whitelist</name> <value>JAVA_HOME, HADOOP_COMMON_HOME, HADOOP_HDFS_HOME, HADOOP_CONF_DIR, CLASSPATH_PREPEND_DISTCACHE, HADOOP_YARN_HOME, HADOOP_MAPRED_HOME </value> </property> <!-- This property specify to show system healthy until capacity is reached 98.5% --> <property> <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage </name> <value>98.5</value> </property>
<!-- Place below properties inside configuration tag --> <property> <name>yarn.app.mapreduce.am.env</name> <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value> </property> <property> <name>mapreduce.map.env</name> <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value> </property> <property> <name>mapreduce.reduce.env</name> <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value> </property> <property> <name>mapred.job.tracker</name> <value>localhost:8021</value> </property>
# export JAVA_HOME [Same as in .profile file] export JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_141.jdk/Contents/Home # Location of Hadoop. export HADOOP_HOME=[Download hadoop Binary path]
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
source ~/.profile $HADOOP_HOME/bin/hdfs namenode -format
$HADOOP_HOME/sbin/start-all.sh
$HADOOP_HOME/sbin/start-dfs.sh $HADOOP_HOME/sbin/start-yarn.sh
$ jps Output 2448 SecondaryNameNode 2646 ResourceManager 2311 DataNode 2746 NodeManager 2815 Jps 2207 NameNode
Hadoop Health: http://localhost:9870 Yarn: http://localhost:8088/cluster
$HADOOP_HOME/sbin/stop-all.sh
$HADOOP_HOME/sbin/stop-dfs.sh $HADOOP_HOME/sbin/stop-yarn.sh
$ hadoop version Output: Hadoop 3.0.3
hadoop fs -ls /
hadoop fs -mkdir -p /user/[username]
hadoop fs -copyFromLocal $HADOOP_HOME/etc/hadoop/core-site.xml . hadoop fs -put $HADOOP_HOME/etc/hadoop/core-site.xml /user/[username]/output
hadoop fs -cat hdfs://localhost:9000/user/[username]/output/part-r-00000