link cai dat Hadoop https://codewitharjun.medium.
com/install-hadoop-on-ubuntu-
operating-system-6e0ca4ef9689
sudo apt update
sudo apt install openjdk-11-jdk
export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
export PATH=$PATH:/usr/lib/jvm/java-11-openjdk-amd64/bin
export HADOOP_HOME=~/hadoop-3.2.3/
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export HADOOP_STREAMING=$HADOOP_HOME/share/hadoop/tools/lib/hadoop-streaming-
3.2.3.jar
export HADOOP_LOG_DIR=$HADOOP_HOME/logs
export PDSH_RCMD_TYPE=ssh
sudo apt-get install ssh
tar -zxvf ~/Downloads/hadoop-3.2.3.tar.gz
cd hadoop-3.2.3/etc/hadoop
sudo nano hadoop-env.sh
JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
sudo nano core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value> </property>
<property>
<name>hadoop.proxyuser.dataflair.groups</name> <value>*</value>
</property>
<property>
<name>hadoop.proxyuser.dataflair.hosts</name> <value>*</value>
</property>
<property>
<name>hadoop.proxyuser.server.hosts</name> <value>*</value>
</property>
<property>
<name>hadoop.proxyuser.server.groups</name> <value>*</value>
</property>
</configuration>
sudo nano hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
sudo nano mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name> <value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/
hadoop/mapreduce/lib/*</value>
</property>
</configuration>
sudo nano yarn-site.xml
-----masternode
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>masternode</value>
</property>
</configuration>
-----datanode
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>masternode</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
ssh localhost
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys
hadoop-3.2.3/bin/hdfs namenode -format
start-all.sh
(Start NameNode daemon and DataNode daemon)
localhost:9870
hadoop fs -mkdir /user
hadoop fs -mkdir /user/thuan/
stop-all.sh
start-all.sh
(Start NameNode daemon and DataNode daemon)
localhost:9870
codewitharjun@cwa:~$ hadoop fs -mkdir /user
codewitharjun@cwa:~$ hadoop fs -mkdir /user/arjun.gautam
codewitharjun@cwa:~$ touch demo.csv
codewitharjun@cwa:~$ hadoop fs -put demo.csv /user/arjun.gautam
stop-all.sh
dd if=/dev/zero of=/home/thuan/Downloads/virtual_file1.img bs=1M count=1024
hdfs dfs -put /home/thuan/Downloads/virtual_file1.img /user/thuan/