package web import ( "chat/utils" "golang.org/x/net/html" "regexp" "strings" ) var unexpected = []string{ "", "", "
", ""), 1) return strings.Split(suf, "")[0] } func SplitPagination(html string) string { pre := strings.Split(html, "
  • ")[0] return utils.TryGet(strings.Split(pre, "
    在新选项卡中打开链接
    "), 1) } func GetContent(html string) []string { re := regexp.MustCompile(`>([^<]+)<`) matches := re.FindAllString(html, -1) return FilterContent(matches) } func IsExpected(data string) bool { if IsLink(data) { return false } for _, str := range unexpected { if strings.HasPrefix(data, str) { return false } } return true } func IsLink(input string) bool { re := regexp.MustCompile(`^(https?|ftp):\/\/[^\s/$.?#].\S*$`) return re.MatchString(input) } func FilterContent(matches []string) []string { res := make([]string, 0) for _, match := range matches { source := strings.TrimSpace(match[1 : len(match)-1]) if len(source) > 0 && IsExpected(source) { res = append(res, source) } } return res }