This tutorial will provide step by step instruction to setup HDFS on Mac OS.
Download Apache Hadoop
$HOME/hadoop/
$ java -version
Output:
java version "1.8.0_141"
Java(TM) SE Runtime Environment (build 1.8.0_141-b15)
Java HotSpot(TM) 64-Bit Server VM (build 25.141-b15, mixed mode)
## Set Java Home as env variable
export JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_141.jdk/Contents/Home
## Set HADOOP environment variables
export HADOOP_HOME=$HOME/hadoop/hadoop-3.0.3
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export YARN_HOME=$HADOOP_HOME
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME
export HADOOP_LIBEXEC_DIR=$HADOOP_HOME/libexec
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_INSTALL=$HADOOP_HOME
## Set Path to the Hadoop Binary
export PATH=$PATH:$HADOOP_HOME/bin
Note: Path of Java Home can be determined by using below command in Terminal
/usr/libexec/java_home
<!-- Place below properties inside configuration tag -->
<!-- Specify the port for hadoop hdfs -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!-- Place below properties inside configuration tag -->
<!-- Directory Path where hadoop file system will be created(create these hadoop_storage directory and specify the path) -->
<property>
<name>dfs.name.dir</name>
<value>[$HOME Path]/hadoop/hadoop_storage</value>
<final>true</final>
</property>
<!-- Default block size is 128 Mb -->
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<!-- Replication factor is set as 1 as this is for local -->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!-- Specify a particular Temporary directory, otherwise it will try to use defalut temp dir of system -->
<property>
<name>hadoop.tmp.dir</name>
<value>[$HOME Path]/hadoop/tmp</value>
</property>
<!-- Below property is for hive setup -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>hdfs://master:8020/user/hive/warehouse</value>
</property>
<!-- Place below properties inside configuration tag -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME, HADOOP_COMMON_HOME, HADOOP_HDFS_HOME, HADOOP_CONF_DIR, CLASSPATH_PREPEND_DISTCACHE, HADOOP_YARN_HOME, HADOOP_MAPRED_HOME
</value>
</property>
<!-- This property specify to show system healthy until capacity is reached 98.5% -->
<property>
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage
</name>
<value>98.5</value>
</property>
<!-- Place below properties inside configuration tag -->
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>localhost:8021</value>
</property>
# export JAVA_HOME [Same as in .profile file]
export JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_141.jdk/Contents/Home
# Location of Hadoop.
export HADOOP_HOME=[Download hadoop Binary path]
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
source ~/.profile
$HADOOP_HOME/bin/hdfs namenode -format
$HADOOP_HOME/sbin/start-all.sh
$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/sbin/start-yarn.sh
$ jps
Output
2448 SecondaryNameNode
2646 ResourceManager
2311 DataNode
2746 NodeManager
2815 Jps
2207 NameNode
Hadoop Health: http://localhost:9870
Yarn: http://localhost:8088/cluster
$HADOOP_HOME/sbin/stop-all.sh
$HADOOP_HOME/sbin/stop-dfs.sh
$HADOOP_HOME/sbin/stop-yarn.sh
$ hadoop version
Output: Hadoop 3.0.3
hadoop fs -ls /
hadoop fs -mkdir -p /user/[username]
hadoop fs -copyFromLocal $HADOOP_HOME/etc/hadoop/core-site.xml .
hadoop fs -put $HADOOP_HOME/etc/hadoop/core-site.xml /user/[username]/output
hadoop fs -cat hdfs://localhost:9000/user/[username]/output/part-r-00000