Update Japscan extension to improve chapter URL extraction after the new obfuscation techniques (#10807)
* Update Japscan extension to improve chapter URL extraction and error handling * fix lint
This commit is contained in:
parent
6a29aa1afd
commit
2928fc45a6
@ -1,7 +1,7 @@
|
|||||||
ext {
|
ext {
|
||||||
extName = 'Japscan'
|
extName = 'Japscan'
|
||||||
extClass = '.Japscan'
|
extClass = '.Japscan'
|
||||||
extVersionCode = 52
|
extVersionCode = 53
|
||||||
}
|
}
|
||||||
|
|
||||||
apply from: "$rootDir/common.gradle"
|
apply from: "$rootDir/common.gradle"
|
||||||
|
|||||||
@ -227,15 +227,50 @@ class Japscan : ConfigurableSource, ParsedHttpSource() {
|
|||||||
// Those have a span.badge "SPOILER" or "RAW". The additional pseudo selector makes sure to exclude these from the chapter list.
|
// Those have a span.badge "SPOILER" or "RAW". The additional pseudo selector makes sure to exclude these from the chapter list.
|
||||||
|
|
||||||
override fun chapterFromElement(element: Element): SChapter {
|
override fun chapterFromElement(element: Element): SChapter {
|
||||||
val urlElement = element.selectFirst("*[href~=manga|manhua|manhwa]")
|
// Only search for a tag with any attribute containing manga/manhua/manhwa
|
||||||
if (urlElement == null) {
|
val urlPairs = element.getElementsByTag("a")
|
||||||
|
.mapNotNull { el ->
|
||||||
|
// Find the first attribute whose value matches the chapter URL pattern
|
||||||
|
val attrMatch = el.attributes().asList().firstOrNull { attr ->
|
||||||
|
val value = attr.value
|
||||||
|
value.startsWith("/manga/") || value.startsWith("/manhua/") || value.startsWith("/manhwa/")
|
||||||
|
}
|
||||||
|
if (attrMatch != null) {
|
||||||
|
val name = el.ownText().ifBlank { el.text() }
|
||||||
|
// Mark if the attribute is not "href"
|
||||||
|
val isNonHref = attrMatch.key != "href"
|
||||||
|
Triple(name, attrMatch.value, isNonHref)
|
||||||
|
} else {
|
||||||
|
null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.distinctBy { it.second }
|
||||||
|
.sortedWith(
|
||||||
|
compareByDescending<Triple<String, String, Boolean>> { it.third }
|
||||||
|
.thenBy { it.second.length },
|
||||||
|
) // Prefer non-href first, then shorter URLs
|
||||||
|
.map { Pair(it.first, it.second) }
|
||||||
|
|
||||||
|
var foundPair: Pair<String, String>? = urlPairs.firstOrNull()
|
||||||
|
// var log = urlPairs.size.toString() + " URLs found:\n"
|
||||||
|
// for ((name, url) in urlPairs) {
|
||||||
|
// val testUrl = internalBaseUrl + url
|
||||||
|
// val response = client.newCall(GET(testUrl, headers)).execute()
|
||||||
|
// log += "$name: $testUrl => ${response}\n"
|
||||||
|
// if (response.isSuccessful) {
|
||||||
|
// foundPair = Pair(name, url)
|
||||||
|
// response.close()
|
||||||
|
// break
|
||||||
|
// }
|
||||||
|
// response.close()
|
||||||
|
// }
|
||||||
|
if (foundPair == null) {
|
||||||
throw Exception("Impossible de trouver l'URL du chapitre")
|
throw Exception("Impossible de trouver l'URL du chapitre")
|
||||||
}
|
}
|
||||||
|
|
||||||
val chapter = SChapter.create()
|
val chapter = SChapter.create()
|
||||||
chapter.setUrlWithoutDomain(urlElement.attr("href"))
|
chapter.setUrlWithoutDomain(foundPair.second)
|
||||||
chapter.name = urlElement.ownText()
|
chapter.name = foundPair.first
|
||||||
// Using ownText() doesn't include child's text, like "VUS" or "RAW" badges, in the chapter name.
|
|
||||||
chapter.date_upload = element.selectFirst("span")?.text()?.trim()?.let { parseChapterDate(it) } ?: 0L
|
chapter.date_upload = element.selectFirst("span")?.text()?.trim()?.let { parseChapterDate(it) } ?: 0L
|
||||||
return chapter
|
return chapter
|
||||||
}
|
}
|
||||||
@ -299,7 +334,7 @@ class Japscan : ConfigurableSource, ParsedHttpSource() {
|
|||||||
handler.post { webView?.destroy() }
|
handler.post { webView?.destroy() }
|
||||||
|
|
||||||
if (latch.count == 1L) {
|
if (latch.count == 1L) {
|
||||||
throw Exception("Timed out decrypting image links")
|
throw Exception("Erreur lors de la récupération des pages")
|
||||||
}
|
}
|
||||||
|
|
||||||
val baseUrlHost = internalBaseUrl.toHttpUrl().host.substringAfter("www.")
|
val baseUrlHost = internalBaseUrl.toHttpUrl().host.substringAfter("www.")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user