How can I move files under a folder into subfolders by using channel or goroutine

Issue

I have a folder which contains multiple types of files (with no subfolders in this simple case). Let’s assume it contains 20000 .raw files and 20000 .jpg files. I need to move .raw files into raw folder and .jpg files into jpg folder. So I tired to use golang to solve it:

package main

import (
    "flag"
    "fmt"
    "io/fs"
    "io/ioutil"
    "os"
    "runtime"
    "strings"
    "sync"
    "time"
)

func CreateFolder(basePath string, folderName string) {
    os.Mkdir(basePath+"/"+folderName, 0755)
}

func MoveFile(file string, path string, folder string) {
    err := os.Rename(path+"/"+file, path+"/"+folder+"/"+file)
    if err != nil {
        panic(err)
    }
}

func getInfo(a fs.FileInfo, c chan string) {
    if a.IsDir() || strings.HasPrefix(a.Name(), ".") {
        return
    } else {
        c <- a.Name()
    }
}

func dealInfo(path string, typeDict *sync.Map, c chan string) {
    for name := range c {
        sp := strings.Split(name, ".")
        suffix := sp[len(sp)-1]

        if _, ok := typeDict.Load(suffix); ok {
            MoveFile(name, path, suffix)
        } else {
            CreateFolder(path, suffix)
            MoveFile(name, path, suffix)
            typeDict.Store(suffix, 1)
        }
    }
}

func main() {
    runtime.GOMAXPROCS(8)
    var (
        filepath = flag.String("p", "", "default self folder")
    )

    flag.Parse()
    fmt.Println(*filepath)
    fmt.Println("==========")
    if *filepath == "" {
        fmt.Println("No valid folder path")
        return
    } else {
        fileinfos, err := ioutil.ReadDir(*filepath)
        stime := time.Now()
        if err != nil {
            panic(err)
        }
        var typeDict sync.Map
        ch := make(chan string, 20)

        for _, fs := range fileinfos {
            go getInfo(fs, ch)
            go dealInfo(*filepath, &typeDict, ch)
        }
        fmt.Println(time.Since(stime))
    }
}

But it returns an error: runtime: failed to create new OS thread. I guess this is due to too much goroutines the script created? But I’ve no idea why this could happen because I think ch := make(chan string, 20) would limit the number of goroutine.

I also tried to use wg *sync.WaitGroup, like:


getInfo(...) // use this func to put all files info into a channel

wg.Add(20)

for i:=0; i<20; i++ {
    go dealInfo(..., &wg)  // this new dealInfo contains wg.Done()
}

wg.Wait()

But this will cause a deadlock error.

May I know the best way to move files parallel please? Your help is really appreciated!

Solution

This may work.

However the move operation depends on the Operational System and the Filesystem.

Doing it on parallel may not be optimal via NFS for instance. You must check.

The strategy of list the files, send to channels to be executed (move/rename) by some goroutines is something that I will try in this situation.

The number of goroutines (workers) can be a command line parameter.

Answered By – Tiago Peczenyj

Answer Checked By – Willingham (GoLangFix Volunteer)

Leave a Reply

Your email address will not be published.