From 639a1d64eed1a90f58ba52c151ff61af8f219b56 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 18 Oct 2020 14:16:18 -0700 Subject: [PATCH] Updated run Spark on SeaweedFS (markdown) --- run-Spark-on-SeaweedFS.md | 60 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/run-Spark-on-SeaweedFS.md b/run-Spark-on-SeaweedFS.md index 89de187..0702a5b 100644 --- a/run-Spark-on-SeaweedFS.md +++ b/run-Spark-on-SeaweedFS.md @@ -56,4 +56,62 @@ $ bin/spark-submit --name spark-pi \ --conf spark.eventLog.dir=seaweedfs://192.168.2.3:8888/spark2-history/ \ file:///usr/local/spark/examples/jars/spark-examples_2.12-3.0.0.jar -``` \ No newline at end of file +``` + + +# My Example +Here is my local example switching everything to SeaweedFS. + 1. this is my local spark-defaults.conf +``` +spark.eventLog.enabled=true +spark.sql.hive.convertMetastoreOrc=true +spark.yarn.queue=default +spark.master=local +spark.history.ui.port=18081 +spark.history.fs.cleaner.interval=7d +spark.sql.statistics.fallBackToHdfs=true +spark.yarn.historyServer.address=master:18081 +spark.sql.orc.filterPushdown=true +spark.history.provider=org.apache.spark.deploy.history.FsHistoryProvider +spark.history.fs.cleaner.maxAge=90d +spark.sql.orc.impl=native +spark.history.fs.cleaner.enabled=true + +spark.history.fs.logDirectory=seaweedfs://localhost:8888/spark2-history/ +spark.eventLog.dir=seaweedfs://localhost:8888/spark2-history/ + +spark.driver.extraClassPath=/Users/chris/go/src/github.com/chrislusf/seaweedfs/other/java/hdfs2/target/seaweedfs-hadoop2-client-1.5.0.jar +spark.executor.extraClassPath=/Users/chris/go/src/github.com/chrislusf/seaweedfs/other/java/hdfs2/target/seaweedfs-hadoop2-client-1.5.0.jar +spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem +spark.hadoop.fs.defaultFS=seaweedfs://localhost:8888 +``` + 2. create the spark history folder +``` +$ curl -X POST http://192.168.2.3:8888/spark2-history/ +``` + 3. Run a spark shell +``` +$ bin/spark-shell +20/10/18 14:11:44 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address +20/10/18 14:12:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties +Setting default log level to "WARN". +To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). +Spark context Web UI available at http://192.168.2.4:4040 +Spark context available as 'sc' (master = local, app id = local-1603055539864). +Spark session available as 'spark'. +Welcome to + ____ __ + / __/__ ___ _____/ /__ + _\ \/ _ \/ _ `/ __/ '_/ + /___/ .__/\_,_/_/ /_/\_\ version 3.0.0 + /_/ + +Using Scala version 2.12.10 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_202) +Type in expressions to have them evaluated. +Type :help for more information. + +scala> sc.textFile("/buckets/large/ttt.txt").count +res0: Long = 9374 + +```