Improve MangaDex string sanitization; Fixes #1020 (#1537)

Improve MangaDex string sanitization; Fixes #1020
This commit is contained in:
waicool20 2019-09-21 02:58:07 +08:00 committed by arkon
parent a685757575
commit a096994319
1 changed files with 14 additions and 9 deletions

View File

@ -31,9 +31,9 @@ import okhttp3.HttpUrl
import okhttp3.OkHttpClient import okhttp3.OkHttpClient
import okhttp3.Request import okhttp3.Request
import okhttp3.Response import okhttp3.Response
import org.jsoup.Jsoup
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import org.jsoup.parser.Parser
import rx.Observable import rx.Observable
import uy.kohesive.injekt.Injekt import uy.kohesive.injekt.Injekt
import uy.kohesive.injekt.api.get import uy.kohesive.injekt.api.get
@ -352,7 +352,7 @@ open class Mangadex(override val lang: String, private val internalLang: String,
val json = JsonParser().parse(jsonData).asJsonObject val json = JsonParser().parse(jsonData).asJsonObject
val mangaJson = json.getAsJsonObject("manga") val mangaJson = json.getAsJsonObject("manga")
val chapterJson = json.getAsJsonObject("chapter") val chapterJson = json.getAsJsonObject("chapter")
manga.title = mangaJson.get("title").string manga.title = cleanString(mangaJson.get("title").string)
manga.thumbnail_url = cdnUrl + mangaJson.get("cover_url").string manga.thumbnail_url = cdnUrl + mangaJson.get("cover_url").string
manga.description = cleanString(mangaJson.get("description").string) manga.description = cleanString(mangaJson.get("description").string)
manga.author = mangaJson.get("author").string manga.author = mangaJson.get("author").string
@ -374,13 +374,18 @@ open class Mangadex(override val lang: String, private val internalLang: String,
return manga return manga
} }
// Remove bbcode tags as well as parses any html characters in description or chapter name to actual characters for example &hearts will show a heart // Remove bbcode tags as well as parses any html characters in description or chapter name to actual characters for example ♥ will show ♥
private fun cleanString(description: String): String { private fun cleanString(string: String): String {
return Jsoup.parseBodyFragment(description val bbRegex = """\[(\w+)[^]]*](.*?)\[/\1]""".toRegex()
.replace("[list]", "") var intermediate = string
.replace("[/list]", "") .replace("[list]", "")
.replace("[*]", "") .replace("[/list]", "")
.replace("""\[(\w+)[^\]]*](.*?)\[/\1]""".toRegex(), "$2")).text() .replace("[*]", "")
// Recursively remove nested bbcode
while (bbRegex.containsMatchIn(intermediate)) {
intermediate = intermediate.replace(bbRegex, "$2")
}
return Parser.unescapeEntities(intermediate, false)
} }
override fun mangaDetailsParse(document: Document) = throw Exception("Not Used") override fun mangaDetailsParse(document: Document) = throw Exception("Not Used")