Why do I get the runtime error index out of range, is it a bad use of Slice or Map?

Issue

I have been writing Go for about a week now.
I am trying to scrape the BBS text and save it to DB.

Running the following code will cause runtime error : index out of range.

panic: runtime error: index out of range [1] with length 1

goroutine 1 [running]: main.ThreadGetText({0xc000297ea0, 0x4d},
{0xc000101400, 0x32, 0x40}, {0x0, 0x0, 0x0})
/workspace/godev/baku_go/main.go:158 +0x911 main.main()
/workspace/godev/baku_go/main.go:77 +0x705 exit status 2

I thought it might be the length or capacity of the slice, so I tried 100000 at a time, but it did not work.

I also thought that the initialization location of Slice and Map might be bad, so I tried placing them under import, but it did not work.

package main

import (
    "fmt"
    "log"
    "net/http"
    "regexp"
    "strconv"
    "time"

    "github.com/PuerkitoBio/goquery"
)

const (
    BASE_URL           = "https://bakusai.com/"
    BASE_THREADTOP_URL = "thr_tl/acode=3/ctgid=134/bid=4313/"
)

var (
    get_ichiran_count  int = 0
    maxThreadPageCount int = 1
    thread_urls        []string
    next_url           string
)

type thread_info struct {
    Shopname string
    Url      string
    Text     [][]map[int]string
}

func main() {

    appendtext := make([][]map[int]string, 0)
    now := time.Now()
    thread_urls := Threadichiran(BASE_URL + BASE_THREADTOP_URL)
    next_url := ThreadichiranNextURL(BASE_URL + BASE_THREADTOP_URL)
    fmt.Println("[main]:", thread_urls, "Thread Count:", len(thread_urls))

    for {
        thread_urls = Threadichiran(BASE_URL + next_url)
        time.Sleep(1)
        next_url = ThreadichiranNextURL(BASE_URL + next_url)
        time.Sleep(1)
        fmt.Println("[main] forloop", thread_urls, "Thread Count:", len(thread_urls))

        if next_url == "" {
            break
        } else if get_ichiran_count >= maxThreadPageCount {
            break
        } else if len(next_url) == 0 {
            break
        }
    }

    for _, u := range thread_urls {
        comm_map, _, _ := ThreadGetText(BASE_URL + u)
        time.Sleep(1)
        np := ThreadGetNext(BASE_URL + u)
        time.Sleep(1)
        fmt.Println("[main]", comm_map)
        appendtext = append(appendtext, comm_map)
        if comm_map == nil {
            break
        } else if len(comm_map) == 0 {
            break
        }

        for {
            comm_map, shop_title, thread_parse_url := ThreadGetText(BASE_URL + np)
            appendtext = append(appendtext, comm_map)
            shop_info := thread_info{Shopname: shop_title, Url: thread_parse_url, Text: appendtext}
            fmt.Println("[shop_info]:", shop_info)
            np = ThreadGetNext(BASE_URL + np)
            if np == "" {
                break
            } else if len(np) == 0 {
                break
            }
        }
    }
    fmt.Printf("Time: %vms\n", time.Since(now).Seconds())
}

func Threadichiran(turl string) []string {

    res, err := http.Get(turl)
    if err != nil {
        log.Fatal(err)
    }
    defer res.Body.Close()
    if res.StatusCode != 200 {
        log.Fatalf(res.Status)
    }

    response, err := goquery.NewDocumentFromReader(res.Body)
    if err != nil {
        log.Fatal(err)
    }

    threads := response.Find("div.lSideColumn")
    threads.Find("a").Each(func(index int, item *goquery.Selection) {
        thread_title := item.Text()
        href, _ := item.Attr("href")
        fmt.Println(thread_title)
        thread_urls = append(thread_urls, href)
    })

    return thread_urls
}

func ThreadGetText(thread_parse_url string) ([]map[int]string, string, string) {
    res, err := http.Get(thread_parse_url)

    if err != nil {
        log.Fatal(err)
    }
    defer res.Body.Close()

    if res.StatusCode != 200 {
        log.Fatalf(res.Status)
    }
    response, err := goquery.NewDocumentFromReader(res.Body)
    if err != nil {
        log.Fatal(err)
    }

    var comm []string
    var m_comm []map[int]string

    shop_title := response.Find(".title_thr_wrap ").Text()
    comment := response.Find(".article")

    comment.Each(func(index int, item *goquery.Selection) {
        comment := item.Text()
        comm = append(comm, comment)
        fmt.Println(comm)
    })

    for _, cc := range comm {
        r_number := regexp.MustCompile(`(\d{1,4})`)
        r_time := regexp.MustCompile(`([0-9]{4}/[0-9]{2}/[0-9]{2}\ [0-9]{2}:[0-9]{2})`)
        res_number := r_number.FindString(cc)
        res_time := r_time.FindString(cc)

        sub := regexp.MustCompile(` `)
        split := sub.Split(cc, -1)
        r_time_delete := regexp.MustCompile(`([0-9]{2}:[0-9]{2})`)
        r_tokumei_delete := regexp.MustCompile(`(\[匿名さん\])`)
        res_timedelete_text := r_time_delete.ReplaceAllString(split[1], "")
        if res_timedelete_text == "" {
            res_timedelete_text = "."
        }
        res_time_tokumeidelete_text := r_tokumei_delete.ReplaceAllString(res_timedelete_text, "")
        if res_time_tokumeidelete_text == "" {
            res_time_tokumeidelete_text = "."
        }
        fmt.Println(split)
        fmt.Println(res_time_tokumeidelete_text)
        fmt.Println(res_number, res_time)
        res_mix := res_time + "," + res_time_tokumeidelete_text

        res_number_convert, _ := strconv.Atoi(res_number)
        fmt.Println(res_mix)
        fmt.Println(res_number_convert)

        m_comm = append(m_comm, map[int]string{res_number_convert: res_mix})
        fmt.Println(m_comm)
    }

    return m_comm, shop_title, thread_parse_url
}

func ThreadGetNext(thread_parse_url string) string {
    res, err := http.Get(thread_parse_url)
    if err != nil {
        log.Fatal(err)
    }
    defer res.Body.Close()
    if res.StatusCode != 200 {
        log.Fatalf(res.Status)
    }

    response, err := goquery.NewDocumentFromReader(res.Body)
    if err != nil {
        log.Fatal(err)
    }
    thread_next_page, exist := response.Find(".paging_nextlink_btn > a").Attr("href")

    if exist {
        fmt.Println("[ThreadGetNext]", thread_next_page)
    } else if exist == false {
        return ""
    }
    return thread_next_page
}

func ThreadichiranNextURL(nexts string) string {

    res, err := http.Get(nexts)
    if err != nil {
        log.Fatal(err)
    }
    defer res.Body.Close()
    if res.StatusCode != 200 {
        log.Fatalf(res.Status)
    }

    response, err := goquery.NewDocumentFromReader(res.Body)
    if err != nil {
        log.Fatal(err)
    }
    next_url, exist := response.Find(".paging_nextlink_btn > a").Attr("href")

    if exist {
        fmt.Println("[ThradnextURL]", next_url)
        get_ichiran_count++
        fmt.Println("Count:", get_ichiran_count)
    }
    return next_url
}

I found a similar question, so I added the following code to see if there was a value in the error code line in reference to this one. but no change.

Golang panic: runtime error: index out of range

res_timedelete_text := r_time_delete.ReplaceAllString(split[1], "") // 158
if res_timedelete_text == "" {
    res_timedelete_text = "."
}
res_time_tokumeidelete_text := r_tokumei_delete.ReplaceAllString(res_timedelete_text, "")
if res_time_tokumeidelete_text == "" {
    res_time_tokumeidelete_text = "."
}

What is the cause of the runtime error?
Why is there a goroutine error notation if I am not using goroutine?
Is it better to use pointers?

Solution

It’s perfectly simple:

split := sub.Split(cc, -1)
res_timedelete_text := r_time_delete.ReplaceAllString(split[1], "")

You’re seemingly splitting values on a regular expression matching spaces. At no point are you checking how many parts you’ve split cc in to.

As the docs show, the return value is of the type []string, so you can check how many parts you end up with by checking len(split).

The error message actually tells you that split is a slice of strings with a length of 1. Slices being zero indexed, that means the first element of the slice is accessible through split[0]. You’re using split[1], which results in the error:

panic: runtime error: index out of range [1] with length 1

Change it to split[0] and it’ll work. To be safe, you should check for an empty slice, and perhaps consider handling split having more than 1 element. Something like:

split := sub.Split(cc, -1)
if len(split) == 0 {
    continue // skip
}
if len(split) > 1 {
    // handle split[0:len(split)-1]
}
resTimeDeleteTxt := rTimeDelete.ReplaceAllString(split[0], "")

Or just:

for _, spl := range split {
    resTimeDeleteTxt = rTimeDelete.ReplaceAllString(spl, "")
}

Something like that. It’s not quite clear what you’re trying to do if I’m honest. There’s a ton of regular expressions being applied, which is something that I would argue is code smell. As I state in my about section in my profile:

If your solution relies on more than 3 regular expressions at any given time, you’re part of the problem.

I would suggest you finish up, write up a clear description of what you’re trying to accomplish, and submit the code for review here

Answered By – Elias Van Ootegem

Answer Checked By – Mildred Charles (GoLangFix Admin)

Leave a Reply

Your email address will not be published.