diff --git a/docs/distributed_filer.rst b/docs/distributed_filer.rst index 29055c73e..ad9f85d5c 100644 --- a/docs/distributed_filer.rst +++ b/docs/distributed_filer.rst @@ -10,7 +10,11 @@ However, no SPOF is a must-have requirement for many projects. Luckily, SeaweedFS is so flexible that we can use a completely different way to manage file metadata. -This distributed filer uses Cassandra to store the metadata. +This distributed filer uses Redis or Cassandra to store the metadata. + +Redis Setup +##################### +No setup required. Cassandra Setup ##################### @@ -37,7 +41,14 @@ For production server, you would want to set replication_factor to 3. Sample usage ##################### -To start a weed filer in distributed mode: +To start a weed filer in distributed mode with Redis: + +.. code-block:: bash + + # assuming you already started weed master and weed volume + weed filer -redis.server=localhost:6379 + +To start a weed filer in distributed mode with Cassandra: .. code-block:: bash @@ -57,8 +68,8 @@ Now you can add/delete files Limitation ############ -List sub folders and files are not supported because Cassandra does not support -prefix search. +List sub folders and files are not supported because Redis or Cassandra +does not support prefix search. Flat Namespace Design ############ @@ -73,16 +84,16 @@ A flat namespace would take more space because the parent directories are repeatedly stored. But disk space is a lesser concern especially for distributed systems. -The Cassandra table is a simple file_full_path ~ file_id mapping. Actually -it is a file_full_path ~ list_of_file_ids mapping with the hope to support -easy file appending for streaming files. +So either Redis or Cassandra is a simple file_full_path ~ file_id mapping. +(Actually Cassandra is a file_full_path ~ list_of_file_ids mapping +with the hope to support easy file appending for streaming files.) Complexity ################### For one file retrieval, the full_filename=>file_id lookup will be O(logN) -using Cassandra. But very likely the one additional network hop would -take longer than the Cassandra internal lookup. +using Redis or Cassandra. But very likely the one additional network hop would +take longer than the actual lookup. Use Cases ######################### @@ -93,7 +104,7 @@ read files via HTTP POST directly. Future ################### -SeaweedFS can support additional distributed databases. It will be better +SeaweedFS can support other distributed databases. It will be better if that database can support prefix search, in order to list files under a directory. diff --git a/go/filer/redis_store/redis_store.go b/go/filer/redis_store/redis_store.go new file mode 100644 index 000000000..8fe1c7ca5 --- /dev/null +++ b/go/filer/redis_store/redis_store.go @@ -0,0 +1,48 @@ +package redis_store + +import ( + redis "gopkg.in/redis.v2" +) + +type RedisStore struct { + Client *redis.Client +} + +func NewRedisStore(hostPort string, database int) *RedisStore { + client := redis.NewTCPClient(&redis.Options{ + Addr: hostPort, + Password: "", // no password set + DB: int64(database), + }) + return &RedisStore{Client: client} +} + +func (s *RedisStore) Get(fullFileName string) (fid string, err error) { + fid, err = s.Client.Get(fullFileName).Result() + if err == redis.Nil { + err = nil + } + return fid, err +} +func (s *RedisStore) Put(fullFileName string, fid string) (err error) { + _, err = s.Client.Set(fullFileName, fid).Result() + if err == redis.Nil { + err = nil + } + return err +} + +// Currently the fid is not returned +func (s *RedisStore) Delete(fullFileName string) (fid string, err error) { + _, err = s.Client.Del(fullFileName).Result() + if err == redis.Nil { + err = nil + } + return "", err +} + +func (c *RedisStore) Close() { + if c.Client != nil { + c.Client.Close() + } +} diff --git a/go/weed/filer.go b/go/weed/filer.go index ce4bdf4b9..bc528be33 100644 --- a/go/weed/filer.go +++ b/go/weed/filer.go @@ -24,6 +24,8 @@ type FilerOptions struct { redirectOnRead *bool cassandra_server *string cassandra_keyspace *string + redis_server *string + redis_database *int } func init() { @@ -36,6 +38,8 @@ func init() { f.redirectOnRead = cmdFiler.Flag.Bool("redirectOnRead", false, "whether proxy or redirect to volume server during file GET request") f.cassandra_server = cmdFiler.Flag.String("cassandra.server", "", "host[:port] of the cassandra server") f.cassandra_keyspace = cmdFiler.Flag.String("cassandra.keyspace", "seaweed", "keyspace of the cassandra server") + f.redis_server = cmdFiler.Flag.String("redis.server", "", "host:port of the cassandra server, e.g., 127.0.0.1:6379") + f.redis_database = cmdFiler.Flag.Int("redis.database", 0, "the database on the redis server") } var cmdFiler = &Command{ @@ -70,6 +74,7 @@ func runFiler(cmd *Command, args []string) bool { _, nfs_err := weed_server.NewFilerServer(r, *f.port, *f.master, *f.dir, *f.collection, *f.defaultReplicaPlacement, *f.redirectOnRead, *f.cassandra_server, *f.cassandra_keyspace, + *f.redis_server, *f.redis_database, ) if nfs_err != nil { glog.Fatalf(nfs_err.Error()) diff --git a/go/weed/server.go b/go/weed/server.go index c5be89927..062b02477 100644 --- a/go/weed/server.go +++ b/go/weed/server.go @@ -160,6 +160,7 @@ func runServer(cmd *Command, args []string) bool { _, nfs_err := weed_server.NewFilerServer(r, *filerOptions.port, *filerOptions.master, *filerOptions.dir, *filerOptions.collection, *filerOptions.defaultReplicaPlacement, *filerOptions.redirectOnRead, "", "", + "", 0, ) if nfs_err != nil { glog.Fatalf(nfs_err.Error()) diff --git a/go/weed/weed_server/filer_server.go b/go/weed/weed_server/filer_server.go index 0ac1ab24b..18a02b5e0 100644 --- a/go/weed/weed_server/filer_server.go +++ b/go/weed/weed_server/filer_server.go @@ -8,6 +8,7 @@ import ( "github.com/chrislusf/weed-fs/go/filer/cassandra_store" "github.com/chrislusf/weed-fs/go/filer/embedded_filer" "github.com/chrislusf/weed-fs/go/filer/flat_namespace" + "github.com/chrislusf/weed-fs/go/filer/redis_store" "github.com/chrislusf/weed-fs/go/glog" ) @@ -23,6 +24,7 @@ type FilerServer struct { func NewFilerServer(r *http.ServeMux, port int, master string, dir string, collection string, replication string, redirectOnRead bool, cassandra_server string, cassandra_keyspace string, + redis_server string, redis_database int, ) (fs *FilerServer, err error) { fs = &FilerServer{ master: master, @@ -32,19 +34,22 @@ func NewFilerServer(r *http.ServeMux, port int, master string, dir string, colle port: ":" + strconv.Itoa(port), } - if cassandra_server == "" { + if cassandra_server != "" { + cassandra_store, err := cassandra_store.NewCassandraStore(cassandra_keyspace, cassandra_server) + if err != nil { + glog.Fatalf("Can not connect to cassandra server %s with keyspace %s: %v", cassandra_server, cassandra_keyspace, err) + } + fs.filer = flat_namespace.NewFlatNamesapceFiler(master, cassandra_store) + } else if redis_server != "" { + redis_store := redis_store.NewRedisStore(redis_server, redis_database) + fs.filer = flat_namespace.NewFlatNamesapceFiler(master, redis_store) + } else { if fs.filer, err = embedded_filer.NewFilerEmbedded(master, dir); err != nil { glog.Fatalf("Can not start filer in dir %s : %v", err) return } r.HandleFunc("/admin/mv", fs.moveHandler) - } else { - cassandra_store, err := cassandra_store.NewCassandraStore(cassandra_keyspace, cassandra_server) - if err != nil { - glog.Fatalf("Can not connect to cassandra server %s with keyspace %s: %v", cassandra_server, cassandra_keyspace, err) - } - fs.filer = flat_namespace.NewFlatNamesapceFiler(master, cassandra_store) } r.HandleFunc("/", fs.filerHandler)