From 86995a8a79a1bcc043a82741e4bf1d2731b88df8 Mon Sep 17 00:00:00 2001 From: Zhang Minghan Date: Tue, 12 Dec 2023 09:36:37 +0800 Subject: [PATCH] fix unicode --- adapter/oneapi/processor.go | 6 +++++- utils/char.go | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/adapter/oneapi/processor.go b/adapter/oneapi/processor.go index 7cfd683..eb9f2ff 100644 --- a/adapter/oneapi/processor.go +++ b/adapter/oneapi/processor.go @@ -90,7 +90,11 @@ func getRobustnessResult(chunk string) string { matches := compile.FindStringSubmatch(chunk) if len(matches) > 1 { - return matches[1] + partial := matches[1] + // if is the unicode character + if strings.HasPrefix(partial, "\\u") { + return utils.DecodeUnicode(partial) + } } else { return "" } diff --git a/utils/char.go b/utils/char.go index 83e9803..bd65c55 100644 --- a/utils/char.go +++ b/utils/char.go @@ -151,3 +151,17 @@ func ExtractImageUrls(data string) []string { re := regexp.MustCompile(`(https?://\S+\.(?:png|jpg|jpeg|gif|webp))`) return re.FindAllString(data, -1) } + +func DecodeUnicode(data string) string { + re := regexp.MustCompile(`\\u([0-9a-fA-F]{4})`) + return re.ReplaceAllStringFunc(data, func(s string) string { + if len(s) < 6 { + return s + } + val, err := strconv.ParseInt(s[2:], 16, 32) + if err != nil { + return s + } + return strconv.FormatInt(val, 10) + }) +}