Running Hadoop on the HPC cluster Version 3.0 now requires the use of MyHadoop.
The following example will create a four node hadoop instance and run a sample program.
#!/bin/bash ################################################################################ # slurm.sbatch - A sample submit script for SLURM that illustrates how to # spin up a Hadoop cluster for a map/reduce task using myHadoop # # Created: # Glenn K. Lockwood, San Diego Supercomputer Center February 2014 # Revised: # Tingyang Xu September 2015 # Pariksheet Nanda December 2015 # Rhian Resnick (FAU) January 2021 ################################################################################ #SBATCH -p shortq7 ## -N 4 allocates 4 nodes to run the Hadoop cluster ( 1 master namenode, 3 datanodes) #SBATCH -N 4 # Use 20 Cores on each node #SBATCH -c 20 #SBATCH -C ivybridge # Limit to old compute nodes #################NO CHANGE############################ ## --ntasks-per-node= 1 so that each node runs a single datanode/namenode. #SBATCH --ntasks-per-node= 1 #SBATCH --exclusive #################NO CHANGE END######################## # Load the modules utilized at FAU to for Hadoop module load hadoop/ 2.7 . 7 myhadoop/v0. 30 jdk- 1.8 .0_212-b10-gcc- 8.3 . 0 -q4xbe42 # Download example data to your folder for the mapreduce script. if [ ! -f ./pg2701.txt ]; then echo "*** Retrieving some sample input data" fi # Set the storage directory for temporary Hadoop configuration files. # It must be in a location accessible to all the compute nodes. export HADOOP_CONF_DIR=$PWD/hadoop-conf.$SLURM_JOBID #################NO CHANGE############################ if [ "z$HADOOP_OLD_DIR" == "z" ]; then myhadoop-configure.sh -s `pwd`/scratch else myhadoop-configure.sh -p $HADOOP_OLD_DIR -s `pwd`/scratch fi # test if the HADOOP_CONF_DIR is accessible by the compute nodes. if ! srun ls -d $HADOOP_CONF_DIR; then echo "The configure files are not accessible by the compute nodes. Please consider the the shared, home, or scratch directory to put your HADOOP_CONF_DIR. For example, export HADOOP_CONF_DIR=/scratch/$USER_hadoop-conf.$SLURM_JOBID" myhadoop-cleanup.sh rm -rf $HADOOP_CONF_DIR exit 1 fi start-all.sh #################NO CHANGE END######################## # Make the data directory using Hadoop's hdfs hdfs dfs -mkdir /data hdfs dfs -put ./pg2701.txt /data hdfs dfs -ls /data # Run the word counting script hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples- 2.7 . 7 .jar wordcount \ /data /wordcount-output # Copy the results hdfs dfs -ls /wordcount-output hdfs dfs -get /wordcount-output ./ #################NO CHANGE############################ stop-all.sh myhadoop-cleanup.sh rm -rf $HADOOP_CONF_DIR #################NO CHANGE END######################## |