package main import ( "bufio" "code.google.com/p/weed-fs/go/glog" "code.google.com/p/weed-fs/go/operation" "code.google.com/p/weed-fs/go/util" "fmt" "io" "math" "math/rand" "os" "runtime" "runtime/pprof" "sort" "strings" "sync" "time" ) type BenchmarkOptions struct { server *string concurrency *int numberOfFiles *int fileSize *int idListFile *string write *bool read *bool sequentialRead *bool collection *string cpuprofile *string vid2server map[string]string //cache for vid locations } var ( b BenchmarkOptions ) func init() { cmdBenchmark.Run = runbenchmark // break init cycle cmdBenchmark.IsDebug = cmdBenchmark.Flag.Bool("debug", false, "verbose debug information") b.server = cmdBenchmark.Flag.String("server", "localhost:9333", "weedfs master location") b.concurrency = cmdBenchmark.Flag.Int("c", 64, "number of concurrent write or read processes") b.fileSize = cmdBenchmark.Flag.Int("size", 1024, "simulated file size in bytes") b.numberOfFiles = cmdBenchmark.Flag.Int("n", 1024*1024, "number of files to write for each thread") b.idListFile = cmdBenchmark.Flag.String("list", os.TempDir()+"/benchmark_list.txt", "list of uploaded file ids") b.write = cmdBenchmark.Flag.Bool("write", true, "enable write") b.read = cmdBenchmark.Flag.Bool("read", true, "enable read") b.sequentialRead = cmdBenchmark.Flag.Bool("readSequentially", false, "randomly read by ids from \"-list\" specified file") b.collection = cmdBenchmark.Flag.String("collection", "benchmark", "write data to this collection") b.cpuprofile = cmdBenchmark.Flag.String("cpuprofile", "", "write cpu profile to file") b.vid2server = make(map[string]string) } var cmdBenchmark = &Command{ UsageLine: "benchmark -server=localhost:9333 -c=10 -n=100000", Short: "benchmark on writing millions of files and read out", Long: `benchmark on an empty weed file system. Two tests during benchmark: 1) write lots of small files to the system 2) read the files out The file content is mostly zero, but no compression is done. By default, write 1 million files of 1KB each with 7 concurrent threads, and randomly read them out with 7 concurrent threads. You can choose to only benchmark read or write. During write, the list of uploaded file ids is stored in "-list" specified file. You can also use your own list of file ids to run read test. Write speed and read speed will be collected. The numbers are used to get a sense of the system. But usually your network or the hard drive is the real bottleneck. `, } var ( wait sync.WaitGroup writeStats *stats readStats *stats ) func runbenchmark(cmd *Command, args []string) bool { fmt.Printf("This is Weed File System version %s %s %s\n", VERSION, runtime.GOOS, runtime.GOARCH) if *b.cpuprofile != "" { f, err := os.Create(*b.cpuprofile) if err != nil { glog.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } if *b.write { bench_write() } if *b.read { bench_read() } return true } func bench_write() { fileIdLineChan := make(chan string) finishChan := make(chan bool) writeStats = newStats() idChan := make(chan int) wait.Add(*b.concurrency) go writeFileIds(*b.idListFile, fileIdLineChan, finishChan) for i := 0; i < *b.concurrency; i++ { go writeFiles(idChan, fileIdLineChan, writeStats) } writeStats.start = time.Now() go writeStats.checkProgress("Writing Benchmark", finishChan) for i := 0; i < *b.numberOfFiles; i++ { idChan <- i } close(idChan) wait.Wait() writeStats.end = time.Now() wait.Add(1) finishChan <- true finishChan <- true close(finishChan) wait.Wait() writeStats.printStats() } func bench_read() { fileIdLineChan := make(chan string) finishChan := make(chan bool) readStats = newStats() wait.Add(*b.concurrency) go readFileIds(*b.idListFile, fileIdLineChan) readStats.start = time.Now() go readStats.checkProgress("Randomly Reading Benchmark", finishChan) for i := 0; i < *b.concurrency; i++ { go readFiles(fileIdLineChan, readStats) } wait.Wait() finishChan <- true close(finishChan) readStats.end = time.Now() readStats.printStats() } func writeFiles(idChan chan int, fileIdLineChan chan string, s *stats) { serverLimitChan := make(map[string]chan bool) for { if id, ok := <-idChan; ok { start := time.Now() fp := &operation.FilePart{Reader: &FakeReader{id: uint64(id), size: int64(*b.fileSize)}, FileSize: int64(*b.fileSize)} if assignResult, err := operation.Assign(*b.server, 1, "", *b.collection); err == nil { fp.Server, fp.Fid, fp.Collection = assignResult.PublicUrl, assignResult.Fid, *b.collection if _, ok := serverLimitChan[fp.Server]; !ok { serverLimitChan[fp.Server] = make(chan bool, 7) } serverLimitChan[fp.Server] <- true if _, err := fp.Upload(0, *b.server); err == nil { fileIdLineChan <- fp.Fid s.completed++ s.transferred += int64(*b.fileSize) } else { s.failed++ } writeStats.addSample(time.Now().Sub(start)) <-serverLimitChan[fp.Server] if *cmdBenchmark.IsDebug { fmt.Printf("writing %d file %s\n", id, fp.Fid) } } else { s.failed++ println("writing file error:", err.Error()) } } else { break } } wait.Done() } func readFiles(fileIdLineChan chan string, s *stats) { serverLimitChan := make(map[string]chan bool) masterLimitChan := make(chan bool, 7) for { if fid, ok := <-fileIdLineChan; ok { if len(fid) == 0 { continue } if fid[0] == '#' { continue } if *cmdBenchmark.IsDebug { fmt.Printf("reading file %s\n", fid) } parts := strings.SplitN(fid, ",", 2) vid := parts[0] start := time.Now() if server, ok := b.vid2server[vid]; !ok { masterLimitChan <- true if ret, err := operation.Lookup(*b.server, vid); err == nil { if len(ret.Locations) > 0 { server = ret.Locations[0].PublicUrl b.vid2server[vid] = server } } <-masterLimitChan } if server, ok := b.vid2server[vid]; ok { if _, ok := serverLimitChan[server]; !ok { serverLimitChan[server] = make(chan bool, 7) } serverLimitChan[server] <- true url := "http://" + server + "/" + fid if bytesRead, err := util.Get(url); err == nil { s.completed++ s.transferred += int64(len(bytesRead)) readStats.addSample(time.Now().Sub(start)) } else { s.failed++ println("!!!! Failed to read from ", url, " !!!!!") } <-serverLimitChan[server] } else { s.failed++ println("!!!! volume id ", vid, " location not found!!!!!") } } else { break } } wait.Done() } func writeFileIds(fileName string, fileIdLineChan chan string, finishChan chan bool) { file, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil { glog.Fatalf("File to create file %s: %s\n", fileName, err) } defer file.Close() for { select { case <-finishChan: wait.Done() return case line := <-fileIdLineChan: file.Write([]byte(line)) file.Write([]byte("\n")) } } } func readFileIds(fileName string, fileIdLineChan chan string) { file, err := os.Open(fileName) // For read access. if err != nil { glog.Fatalf("File to read file %s: %s\n", fileName, err) } defer file.Close() r := bufio.NewReader(file) if *b.sequentialRead { for { if line, err := Readln(r); err == nil { fileIdLineChan <- string(line) } else { break } } } else { lines := make([]string, 0, *b.numberOfFiles) for { if line, err := Readln(r); err == nil { lines = append(lines, string(line)) } else { break } } for i := 0; i < *b.numberOfFiles; i++ { fileIdLineChan <- lines[rand.Intn(len(lines))] } } close(fileIdLineChan) } const ( benchResolution = 10000 //0.1 microsecond benchBucket = 1000000000 / benchResolution ) // An efficient statics collecting and rendering type stats struct { data []int overflow []int completed int failed int transferred int64 start time.Time end time.Time } var percentages = []int{50, 66, 75, 80, 90, 95, 98, 99, 100} func newStats() *stats { return &stats{data: make([]int, benchResolution), overflow: make([]int, 0)} } func (s *stats) addSample(d time.Duration) { index := int(d / benchBucket) if index < 0 { fmt.Printf("This request takes %3.1f seconds, skipping!\n", float64(index)/10000) } else if index < len(s.data) { s.data[int(d/benchBucket)]++ } else { s.overflow = append(s.overflow, index) } } func (s *stats) checkProgress(testName string, finishChan chan bool) { fmt.Printf("\n------------ %s ----------\n", testName) ticker := time.Tick(time.Second) for { select { case <-finishChan: return case <-ticker: fmt.Printf("Completed %d of %d requests, %3.1f%%\n", s.completed, *b.numberOfFiles, float64(s.completed)*100/float64(*b.numberOfFiles)) } } } func (s *stats) printStats() { timeTaken := float64(int64(s.end.Sub(s.start))) / 1000000000 fmt.Printf("\nConcurrency Level: %d\n", *b.concurrency) fmt.Printf("Time taken for tests: %.3f seconds\n", timeTaken) fmt.Printf("Complete requests: %d\n", s.completed) fmt.Printf("Failed requests: %d\n", s.failed) fmt.Printf("Total transferred: %d bytes\n", s.transferred) fmt.Printf("Requests per second: %.2f [#/sec]\n", float64(s.completed)/timeTaken) fmt.Printf("Transfer rate: %.2f [Kbytes/sec]\n", float64(s.transferred)/1024/timeTaken) n, sum := 0, 0 min, max := 10000000, 0 for i := 0; i < len(s.data); i++ { n += s.data[i] sum += s.data[i] * i if s.data[i] > 0 { if min > i { min = i } if max < i { max = i } } } n += len(s.overflow) for i := 0; i < len(s.overflow); i++ { sum += s.overflow[i] if min > s.overflow[i] { min = s.overflow[i] } if max < s.overflow[i] { max = s.overflow[i] } } avg := float64(sum) / float64(n) varianceSum := 0.0 for i := 0; i < len(s.data); i++ { if s.data[i] > 0 { d := float64(i) - avg varianceSum += d * d * float64(s.data[i]) } } for i := 0; i < len(s.overflow); i++ { d := float64(s.overflow[i]) - avg varianceSum += d * d } std := math.Sqrt(varianceSum / float64(n)) fmt.Printf("\nConnection Times (ms)\n") fmt.Printf(" min avg max std\n") fmt.Printf("Total: %2.1f %3.1f %3.1f %3.1f\n", float32(min)/10, float32(avg)/10, float32(max)/10, std/10) //printing percentiles fmt.Printf("\nPercentage of the requests served within a certain time (ms)\n") percentiles := make([]int, len(percentages)) for i := 0; i < len(percentages); i++ { percentiles[i] = n * percentages[i] / 100 } percentiles[len(percentiles)-1] = n percentileIndex := 0 currentSum := 0 for i := 0; i < len(s.data); i++ { currentSum += s.data[i] if s.data[i] > 0 && percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] { fmt.Printf(" %3d%% %5.1f ms\n", percentages[percentileIndex], float32(i)/10.0) percentileIndex++ for percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] { percentileIndex++ } } } sort.Ints(s.overflow) for i := 0; i < len(s.overflow); i++ { currentSum++ if percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] { fmt.Printf(" %3d%% %5.1f ms\n", percentages[percentileIndex], float32(s.overflow[i])/10.0) percentileIndex++ for percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] { percentileIndex++ } } } } // a fake reader to generate content to upload type FakeReader struct { id uint64 // an id number size int64 // max bytes } func (l *FakeReader) Read(p []byte) (n int, err error) { if l.size <= 0 { return 0, io.EOF } if int64(len(p)) > l.size { n = int(l.size) } else { n = len(p) } for i := 0; i < n-8; i += 8 { for s := uint(0); s < 8; s++ { p[i] = byte(l.id >> (s * 8)) } } l.size -= int64(n) return } func Readln(r *bufio.Reader) ([]byte, error) { var ( isPrefix bool = true err error = nil line, ln []byte ) for isPrefix && err == nil { line, isPrefix, err = r.ReadLine() ln = append(ln, line...) } return ln, err }