Update Japscan extension to improve chapter URL extraction after the new obfuscation techniques (#10807)

* Update Japscan extension to improve chapter URL extraction and error handling

* fix lint
This commit is contained in:
C0NTR1BUT0R 2025-10-04 22:46:45 +02:00 committed by Draff
parent 6a29aa1afd
commit 2928fc45a6
Signed by: Draff
GPG Key ID: E8A89F3211677653
2 changed files with 42 additions and 7 deletions

View File

@ -1,7 +1,7 @@
ext { ext {
extName = 'Japscan' extName = 'Japscan'
extClass = '.Japscan' extClass = '.Japscan'
extVersionCode = 52 extVersionCode = 53
} }
apply from: "$rootDir/common.gradle" apply from: "$rootDir/common.gradle"

View File

@ -227,15 +227,50 @@ class Japscan : ConfigurableSource, ParsedHttpSource() {
// Those have a span.badge "SPOILER" or "RAW". The additional pseudo selector makes sure to exclude these from the chapter list. // Those have a span.badge "SPOILER" or "RAW". The additional pseudo selector makes sure to exclude these from the chapter list.
override fun chapterFromElement(element: Element): SChapter { override fun chapterFromElement(element: Element): SChapter {
val urlElement = element.selectFirst("*[href~=manga|manhua|manhwa]") // Only search for a tag with any attribute containing manga/manhua/manhwa
if (urlElement == null) { val urlPairs = element.getElementsByTag("a")
.mapNotNull { el ->
// Find the first attribute whose value matches the chapter URL pattern
val attrMatch = el.attributes().asList().firstOrNull { attr ->
val value = attr.value
value.startsWith("/manga/") || value.startsWith("/manhua/") || value.startsWith("/manhwa/")
}
if (attrMatch != null) {
val name = el.ownText().ifBlank { el.text() }
// Mark if the attribute is not "href"
val isNonHref = attrMatch.key != "href"
Triple(name, attrMatch.value, isNonHref)
} else {
null
}
}
.distinctBy { it.second }
.sortedWith(
compareByDescending<Triple<String, String, Boolean>> { it.third }
.thenBy { it.second.length },
) // Prefer non-href first, then shorter URLs
.map { Pair(it.first, it.second) }
var foundPair: Pair<String, String>? = urlPairs.firstOrNull()
// var log = urlPairs.size.toString() + " URLs found:\n"
// for ((name, url) in urlPairs) {
// val testUrl = internalBaseUrl + url
// val response = client.newCall(GET(testUrl, headers)).execute()
// log += "$name: $testUrl => ${response}\n"
// if (response.isSuccessful) {
// foundPair = Pair(name, url)
// response.close()
// break
// }
// response.close()
// }
if (foundPair == null) {
throw Exception("Impossible de trouver l'URL du chapitre") throw Exception("Impossible de trouver l'URL du chapitre")
} }
val chapter = SChapter.create() val chapter = SChapter.create()
chapter.setUrlWithoutDomain(urlElement.attr("href")) chapter.setUrlWithoutDomain(foundPair.second)
chapter.name = urlElement.ownText() chapter.name = foundPair.first
// Using ownText() doesn't include child's text, like "VUS" or "RAW" badges, in the chapter name.
chapter.date_upload = element.selectFirst("span")?.text()?.trim()?.let { parseChapterDate(it) } ?: 0L chapter.date_upload = element.selectFirst("span")?.text()?.trim()?.let { parseChapterDate(it) } ?: 0L
return chapter return chapter
} }
@ -299,7 +334,7 @@ class Japscan : ConfigurableSource, ParsedHttpSource() {
handler.post { webView?.destroy() } handler.post { webView?.destroy() }
if (latch.count == 1L) { if (latch.count == 1L) {
throw Exception("Timed out decrypting image links") throw Exception("Erreur lors de la récupération des pages")
} }
val baseUrlHost = internalBaseUrl.toHttpUrl().host.substringAfter("www.") val baseUrlHost = internalBaseUrl.toHttpUrl().host.substringAfter("www.")