mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2024-01-19 02:48:24 +00:00
Updated run Spark on SeaweedFS (markdown)
parent
0401227605
commit
639a1d64ee
|
@ -57,3 +57,61 @@ $ bin/spark-submit --name spark-pi \
|
||||||
file:///usr/local/spark/examples/jars/spark-examples_2.12-3.0.0.jar
|
file:///usr/local/spark/examples/jars/spark-examples_2.12-3.0.0.jar
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# My Example
|
||||||
|
Here is my local example switching everything to SeaweedFS.
|
||||||
|
1. this is my local spark-defaults.conf
|
||||||
|
```
|
||||||
|
spark.eventLog.enabled=true
|
||||||
|
spark.sql.hive.convertMetastoreOrc=true
|
||||||
|
spark.yarn.queue=default
|
||||||
|
spark.master=local
|
||||||
|
spark.history.ui.port=18081
|
||||||
|
spark.history.fs.cleaner.interval=7d
|
||||||
|
spark.sql.statistics.fallBackToHdfs=true
|
||||||
|
spark.yarn.historyServer.address=master:18081
|
||||||
|
spark.sql.orc.filterPushdown=true
|
||||||
|
spark.history.provider=org.apache.spark.deploy.history.FsHistoryProvider
|
||||||
|
spark.history.fs.cleaner.maxAge=90d
|
||||||
|
spark.sql.orc.impl=native
|
||||||
|
spark.history.fs.cleaner.enabled=true
|
||||||
|
|
||||||
|
spark.history.fs.logDirectory=seaweedfs://localhost:8888/spark2-history/
|
||||||
|
spark.eventLog.dir=seaweedfs://localhost:8888/spark2-history/
|
||||||
|
|
||||||
|
spark.driver.extraClassPath=/Users/chris/go/src/github.com/chrislusf/seaweedfs/other/java/hdfs2/target/seaweedfs-hadoop2-client-1.5.0.jar
|
||||||
|
spark.executor.extraClassPath=/Users/chris/go/src/github.com/chrislusf/seaweedfs/other/java/hdfs2/target/seaweedfs-hadoop2-client-1.5.0.jar
|
||||||
|
spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem
|
||||||
|
spark.hadoop.fs.defaultFS=seaweedfs://localhost:8888
|
||||||
|
```
|
||||||
|
2. create the spark history folder
|
||||||
|
```
|
||||||
|
$ curl -X POST http://192.168.2.3:8888/spark2-history/
|
||||||
|
```
|
||||||
|
3. Run a spark shell
|
||||||
|
```
|
||||||
|
$ bin/spark-shell
|
||||||
|
20/10/18 14:11:44 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
|
||||||
|
20/10/18 14:12:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
|
||||||
|
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
|
||||||
|
Setting default log level to "WARN".
|
||||||
|
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
|
||||||
|
Spark context Web UI available at http://192.168.2.4:4040
|
||||||
|
Spark context available as 'sc' (master = local, app id = local-1603055539864).
|
||||||
|
Spark session available as 'spark'.
|
||||||
|
Welcome to
|
||||||
|
____ __
|
||||||
|
/ __/__ ___ _____/ /__
|
||||||
|
_\ \/ _ \/ _ `/ __/ '_/
|
||||||
|
/___/ .__/\_,_/_/ /_/\_\ version 3.0.0
|
||||||
|
/_/
|
||||||
|
|
||||||
|
Using Scala version 2.12.10 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_202)
|
||||||
|
Type in expressions to have them evaluated.
|
||||||
|
Type :help for more information.
|
||||||
|
|
||||||
|
scala> sc.textFile("/buckets/large/ttt.txt").count
|
||||||
|
res0: Long = 9374
|
||||||
|
|
||||||
|
```
|
||||||
|
|
Loading…
Reference in a new issue