diff --git a/Hadoop-Compatible-File-System.md b/Hadoop-Compatible-File-System.md index 1b07638..b46e528 100644 --- a/Hadoop-Compatible-File-System.md +++ b/Hadoop-Compatible-File-System.md @@ -6,28 +6,46 @@ SeaweedFS excels on small files and has no issue to store large files. Now it is ``` $cd $GOPATH/src/github.com/chrislusf/seaweedfs/other/java/client $ mvn install -$cd $GOPATH/src/github.com/chrislusf/seaweedfs/other/java/hdfs + +# build for hadoop2 +$cd $GOPATH/src/github.com/chrislusf/seaweedfs/other/java/hdfs2 $ mvn package -$ ls -al target/seaweedfs-hadoop-client-*.jar +$ ls -al target/seaweedfs-hadoop2-client-*.jar + +# build for hadoop3 +$cd $GOPATH/src/github.com/chrislusf/seaweedfs/other/java/hdfs3 +$ mvn package +$ ls -al target/seaweedfs-hadoop3-client-*.jar ``` Or you can download the latest version from MavenCentral -* https://mvnrepository.com/artifact/com.github.chrislusf/seaweedfs-hadoop-client -* http://central.maven.org/maven2/com/github/chrislusf/seaweedfs-hadoop-client/ +* https://mvnrepository.com/artifact/com.github.chrislusf/seaweedfs-hadoop2-client +* http://central.maven.org/maven2/com/github/chrislusf/seaweedfs-hadoop2-client/ + +* https://mvnrepository.com/artifact/com.github.chrislusf/seaweedfs-hadoop3-client +* http://central.maven.org/maven2/com/github/chrislusf/seaweedfs-hadoop3-client/ # Test SeaweedFS on Hadoop Suppose you are getting a new Hadoop installation. Here are the minimum steps to get SeaweedFS to run. -You would need to start a weed filer first, build the seaweedfs-hadoop-client-xxx.jar, and do the following: +You would need to start a weed filer first, build the seaweedfs-hadoop2-client-xxx.jar +or seaweedfs-hadoop3-client-xxx.jar, and do the following: ``` $ cd ${HADOOP_HOME} # create etc/hadoop/mapred-site.xml, just to satisfy hdfs dfs. skip this if the file already exists. $ echo "" > etc/hadoop/mapred-site.xml + +# on hadoop2 $ bin/hdfs dfs -Dfs.defaultFS=seaweedfs://localhost:8888 \ -Dfs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \ - -libjars ./seaweedfs-hadoop-client-x.x.x.jar \ + -libjars ./seaweedfs-hadoop2-client-x.x.x.jar \ + -ls / +# or on hadoop3 +$ bin/hdfs dfs -Dfs.defaultFS=seaweedfs://localhost:8888 \ + -Dfs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \ + -libjars ./seaweedfs-hadoop3-client-x.x.x.jar \ -ls / ``` @@ -57,7 +75,10 @@ $ bin/hadoop classpath # Copy SeaweedFS HDFS client jar to one of the folders $ cd ${HADOOP_HOME} -$ cp ./seaweedfs-hadoop-client-x.x.x.jar share/hadoop/common/lib/ +# for hadoop2 +$ cp ./seaweedfs-hadoop2-client-x.x.x.jar share/hadoop/common/lib/ +# or for hadoop3 +$ cp ./seaweedfs-hadoop3-client-x.x.x.jar share/hadoop/common/lib/ ``` Now you can do this: @@ -83,12 +104,12 @@ To make these files visible to Spark, set HADOOP_CONF_DIR in $SPARK_HOME/conf/sp ## installation not inheriting from Hadoop cluster configuration -Copy the seaweedfs-hadoop-client-x.x.x.jar to all executor machines. +Copy the seaweedfs-hadoop2-client-x.x.x.jar to all executor machines. Add the following to spark/conf/spark-defaults.conf on every node running Spark ``` -spark.driver.extraClassPath /path/to/seaweedfs-hadoop-client-x.x.x.jar -spark.executor.extraClassPath /path/to/seaweedfs-hadoop-client-x.x.x.jar +spark.driver.extraClassPath /path/to/seaweedfs-hadoop2-client-x.x.x.jar +spark.executor.extraClassPath /path/to/seaweedfs-hadoop2-client-x.x.x.jar ``` And modify the configuration at runntime: