arangodb s3 bucket name compatibility (#3588)

* Update arangodb_store.go

* update readme, properly escape queries, add name patching

* use underscore

* use underscore

* better comment

* fix readme

Co-authored-by: a <a@a.a>
This commit is contained in:
gfx 2022-09-09 11:43:42 -05:00 committed by GitHub
parent 10d545060f
commit 48db56ddad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 58 additions and 7 deletions

View file

@ -66,8 +66,8 @@ func (store *ArangodbStore) Initialize(configuration util.Configuration, prefix
}
func (store *ArangodbStore) connection(uris []string, user string, pass string, insecure bool) (err error) {
ctx, _ := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cn := context.WithTimeout(context.Background(), 10*time.Second)
defer cn()
store.connect, err = http.NewConnection(http.ConnectionConfig{
Endpoints: uris,
TLSConfig: &tls.Config{
@ -274,10 +274,10 @@ func (store *ArangodbStore) DeleteFolderChildren(ctx context.Context, fullpath u
for d in %s
filter starts_with(d.directory, "%s/") || d.directory == "%s"
remove d._key in %s`,
targetCollection.Name(),
"`"+targetCollection.Name()+"`",
strings.Join(strings.Split(string(fullpath), "/"), ","),
string(fullpath),
targetCollection.Name(),
"`"+targetCollection.Name()+"`",
)
cur, err := store.database.Query(ctx, query, nil)
if err != nil {
@ -296,7 +296,7 @@ func (store *ArangodbStore) ListDirectoryPrefixedEntries(ctx context.Context, di
if err != nil {
return lastFileName, err
}
query := "for d in " + targetCollection.Name()
query := "for d in " + "`" + targetCollection.Name() + "`"
if includeStartFile {
query = query + " filter d.name >= \"" + startFileName + "\" "
} else {

View file

@ -98,8 +98,26 @@ func (store *ArangodbStore) ensureBucket(ctx context.Context, bucket string) (bc
return store.buckets[bucket], nil
}
// transform to an arango compliant name
func bucketToCollectionName(s string) string {
if len(s) == 0 {
return ""
}
// replace all "." with _
s = strings.ReplaceAll(s, ".", "_")
// if starts with number or '.' then add a special prefix
if (s[0] >= '0' && s[0] <= '9') || (s[0] == '.' || s[0] == '_' || s[0] == '-') {
s = "xN--" + s
}
return s
}
// creates collection if not exist, ensures indices if not exist
func (store *ArangodbStore) ensureCollection(ctx context.Context, name string) (c driver.Collection, err error) {
func (store *ArangodbStore) ensureCollection(ctx context.Context, bucket_name string) (c driver.Collection, err error) {
// convert the bucket to collection name
name := bucketToCollectionName(bucket_name)
ok, err := store.database.CollectionExists(ctx, name)
if err != nil {
return

View file

@ -22,6 +22,39 @@ i test using this dev database:
`docker run -p 8529:8529 -e ARANGO_ROOT_PASSWORD=test arangodb/arangodb:3.9.0`
## database structure
arangodb has a few restrictions which require the use of a few tricks in order to losslessly store the data.
### filer store
arangodb does not support []byte, and will store such as a uint64 array. this would be a waste of space. to counteract this, we store the data as a length prefixed uint64 byteset.
### filer kv
same as above
### filer buckets
s3 buckets are implemented through arangodb collection. this allows us to do very fast bucket deletion by simply deleting the collection
arangodb collection name rules is character set `azAZ09_-` with a 256 character max. however the first character must be a letter.
s3 bucket name rule is the set `azAZ09.-` with a 63 characters max.
the rules for collection names is then the following:
1. if the bucket name is a valid arangodb collection name, then nothing is done.
2. if the bucket name contains a ".", the "." is replaced with "_"
3. if the bucket name now begins with a number or "_", the prefix "xN--" is prepended to the collection name
this allows for these collection names to be used.
## features i don't personally need but are missing
[ ] provide tls cert to arango
[ ] authentication that is not basic auth