fix unicode

This commit is contained in:
Zhang Minghan 2023-12-12 09:36:37 +08:00
parent 0d45c5bc0b
commit 86995a8a79
2 changed files with 19 additions and 1 deletions

View File

@ -90,7 +90,11 @@ func getRobustnessResult(chunk string) string {
matches := compile.FindStringSubmatch(chunk)
if len(matches) > 1 {
return matches[1]
partial := matches[1]
// if is the unicode character
if strings.HasPrefix(partial, "\\u") {
return utils.DecodeUnicode(partial)
}
} else {
return ""
}

View File

@ -151,3 +151,17 @@ func ExtractImageUrls(data string) []string {
re := regexp.MustCompile(`(https?://\S+\.(?:png|jpg|jpeg|gif|webp))`)
return re.FindAllString(data, -1)
}
func DecodeUnicode(data string) string {
re := regexp.MustCompile(`\\u([0-9a-fA-F]{4})`)
return re.ReplaceAllStringFunc(data, func(s string) string {
if len(s) < 6 {
return s
}
val, err := strconv.ParseInt(s[2:], 16, 32)
if err != nil {
return s
}
return strconv.FormatInt(val, 10)
})
}