2019-08-30 09:26:50 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2019-09-29 06:17:37 +00:00
|
|
|
"flag"
|
|
|
|
"fmt"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
|
2022-07-29 07:17:28 +00:00
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/backend"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
|
2019-08-30 09:26:50 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
2019-09-29 06:17:37 +00:00
|
|
|
volumePath = flag.String("dir", "/tmp", "data directory to store files")
|
|
|
|
volumeCollection = flag.String("collection", "", "the volume collection name")
|
|
|
|
volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.")
|
2019-08-30 09:26:50 +00:00
|
|
|
)
|
|
|
|
|
2019-09-29 06:17:37 +00:00
|
|
|
func Checksum(n *needle.Needle) string {
|
|
|
|
return fmt.Sprintf("%s%x", n.Id, n.Cookie)
|
2019-08-30 09:26:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type VolumeFileScanner4SeeDat struct {
|
2019-09-29 06:17:37 +00:00
|
|
|
version needle.Version
|
2019-12-23 20:48:20 +00:00
|
|
|
block super_block.SuperBlock
|
2019-08-30 09:26:50 +00:00
|
|
|
|
2019-11-12 06:21:43 +00:00
|
|
|
dir string
|
|
|
|
hashes map[string]bool
|
|
|
|
dat *os.File
|
2019-11-29 02:33:18 +00:00
|
|
|
datBackend backend.BackendStorageFile
|
2019-08-30 09:26:50 +00:00
|
|
|
}
|
|
|
|
|
2019-12-23 20:48:20 +00:00
|
|
|
func (scanner *VolumeFileScanner4SeeDat) VisitSuperBlock(superBlock super_block.SuperBlock) error {
|
2019-12-28 19:50:42 +00:00
|
|
|
scanner.version = superBlock.Version
|
2019-09-29 06:17:37 +00:00
|
|
|
scanner.block = superBlock
|
|
|
|
return nil
|
2019-08-30 09:26:50 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
func (scanner *VolumeFileScanner4SeeDat) ReadNeedleBody() bool {
|
2019-09-29 06:17:37 +00:00
|
|
|
return true
|
2019-08-30 09:26:50 +00:00
|
|
|
}
|
|
|
|
|
2019-10-22 07:50:30 +00:00
|
|
|
func (scanner *VolumeFileScanner4SeeDat) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
|
2019-08-30 09:26:50 +00:00
|
|
|
|
2019-10-29 07:35:16 +00:00
|
|
|
if scanner.datBackend == nil {
|
|
|
|
newFileName := filepath.Join(*volumePath, "dat_fixed")
|
|
|
|
newDatFile, err := os.Create(newFileName)
|
2019-09-29 06:17:37 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Fatalf("Write New Volume Data %v", err)
|
|
|
|
}
|
2019-10-30 05:37:36 +00:00
|
|
|
scanner.datBackend = backend.NewDiskFile(newDatFile)
|
2019-10-29 07:35:16 +00:00
|
|
|
scanner.datBackend.WriteAt(scanner.block.Bytes(), 0)
|
2019-08-30 09:26:50 +00:00
|
|
|
}
|
|
|
|
|
2019-09-29 06:17:37 +00:00
|
|
|
checksum := Checksum(n)
|
2019-08-30 09:26:50 +00:00
|
|
|
|
2019-09-29 06:17:37 +00:00
|
|
|
if scanner.hashes[checksum] {
|
|
|
|
glog.V(0).Infof("duplicate checksum:%s fid:%d,%s%x @ offset:%d", checksum, *volumeId, n.Id, n.Cookie, offset)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
scanner.hashes[checksum] = true
|
2019-08-30 09:26:50 +00:00
|
|
|
|
2019-10-29 07:35:16 +00:00
|
|
|
_, s, _, e := n.Append(scanner.datBackend, scanner.version)
|
2019-09-29 06:17:37 +00:00
|
|
|
fmt.Printf("size %d error %v\n", s, e)
|
2019-08-30 09:26:50 +00:00
|
|
|
|
2019-09-29 06:17:37 +00:00
|
|
|
return nil
|
2019-08-30 09:26:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func main() {
|
2019-09-29 06:17:37 +00:00
|
|
|
flag.Parse()
|
2019-08-30 09:26:50 +00:00
|
|
|
|
2019-09-29 06:17:37 +00:00
|
|
|
vid := needle.VolumeId(*volumeId)
|
2019-08-30 09:26:50 +00:00
|
|
|
|
2019-09-29 06:17:37 +00:00
|
|
|
outpath, _ := filepath.Abs(filepath.Dir(os.Args[0]))
|
2019-08-30 09:26:50 +00:00
|
|
|
|
2019-09-29 06:17:37 +00:00
|
|
|
scanner := &VolumeFileScanner4SeeDat{
|
|
|
|
dir: filepath.Join(outpath, "out"),
|
|
|
|
hashes: map[string]bool{},
|
|
|
|
}
|
2019-08-30 09:26:50 +00:00
|
|
|
|
2019-09-29 06:17:37 +00:00
|
|
|
if _, err := os.Stat(scanner.dir); err != nil {
|
|
|
|
if err := os.MkdirAll(scanner.dir, os.ModePerm); err != nil {
|
|
|
|
glog.Fatalf("could not create output dir : %s", err)
|
|
|
|
}
|
|
|
|
}
|
2019-08-30 09:26:50 +00:00
|
|
|
|
2019-09-29 06:17:37 +00:00
|
|
|
err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, storage.NeedleMapInMemory, scanner)
|
|
|
|
if err != nil {
|
|
|
|
glog.Fatalf("Reading Volume File [ERROR] %s\n", err)
|
|
|
|
}
|
2019-08-30 09:26:50 +00:00
|
|
|
|
|
|
|
}
|