mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2024-01-19 02:48:24 +00:00
Updated run Spark on SeaweedFS (markdown)
parent
0401227605
commit
639a1d64ee
|
@ -56,4 +56,62 @@ $ bin/spark-submit --name spark-pi \
|
|||
--conf spark.eventLog.dir=seaweedfs://192.168.2.3:8888/spark2-history/ \
|
||||
file:///usr/local/spark/examples/jars/spark-examples_2.12-3.0.0.jar
|
||||
|
||||
```
|
||||
```
|
||||
|
||||
|
||||
# My Example
|
||||
Here is my local example switching everything to SeaweedFS.
|
||||
1. this is my local spark-defaults.conf
|
||||
```
|
||||
spark.eventLog.enabled=true
|
||||
spark.sql.hive.convertMetastoreOrc=true
|
||||
spark.yarn.queue=default
|
||||
spark.master=local
|
||||
spark.history.ui.port=18081
|
||||
spark.history.fs.cleaner.interval=7d
|
||||
spark.sql.statistics.fallBackToHdfs=true
|
||||
spark.yarn.historyServer.address=master:18081
|
||||
spark.sql.orc.filterPushdown=true
|
||||
spark.history.provider=org.apache.spark.deploy.history.FsHistoryProvider
|
||||
spark.history.fs.cleaner.maxAge=90d
|
||||
spark.sql.orc.impl=native
|
||||
spark.history.fs.cleaner.enabled=true
|
||||
|
||||
spark.history.fs.logDirectory=seaweedfs://localhost:8888/spark2-history/
|
||||
spark.eventLog.dir=seaweedfs://localhost:8888/spark2-history/
|
||||
|
||||
spark.driver.extraClassPath=/Users/chris/go/src/github.com/chrislusf/seaweedfs/other/java/hdfs2/target/seaweedfs-hadoop2-client-1.5.0.jar
|
||||
spark.executor.extraClassPath=/Users/chris/go/src/github.com/chrislusf/seaweedfs/other/java/hdfs2/target/seaweedfs-hadoop2-client-1.5.0.jar
|
||||
spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem
|
||||
spark.hadoop.fs.defaultFS=seaweedfs://localhost:8888
|
||||
```
|
||||
2. create the spark history folder
|
||||
```
|
||||
$ curl -X POST http://192.168.2.3:8888/spark2-history/
|
||||
```
|
||||
3. Run a spark shell
|
||||
```
|
||||
$ bin/spark-shell
|
||||
20/10/18 14:11:44 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
|
||||
20/10/18 14:12:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
|
||||
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
|
||||
Setting default log level to "WARN".
|
||||
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
|
||||
Spark context Web UI available at http://192.168.2.4:4040
|
||||
Spark context available as 'sc' (master = local, app id = local-1603055539864).
|
||||
Spark session available as 'spark'.
|
||||
Welcome to
|
||||
____ __
|
||||
/ __/__ ___ _____/ /__
|
||||
_\ \/ _ \/ _ `/ __/ '_/
|
||||
/___/ .__/\_,_/_/ /_/\_\ version 3.0.0
|
||||
/_/
|
||||
|
||||
Using Scala version 2.12.10 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_202)
|
||||
Type in expressions to have them evaluated.
|
||||
Type :help for more information.
|
||||
|
||||
scala> sc.textFile("/buckets/large/ttt.txt").count
|
||||
res0: Long = 9374
|
||||
|
||||
```
|
||||
|
|
Loading…
Reference in a new issue