Fix Twi4 due to site layout change (#19180)

* Fix new URL scheme. Close #14735

* Updated to latest site layout
This commit is contained in:
ringosham 2023-12-07 13:17:08 +00:00 committed by GitHub
parent 4e56e7bc69
commit b3f36f77d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 45 additions and 48 deletions

View File

@ -6,7 +6,7 @@ ext {
extName = 'Twi4' extName = 'Twi4'
pkgNameSuffix = 'ja.twi4' pkgNameSuffix = 'ja.twi4'
extClass = '.Twi4' extClass = '.Twi4'
extVersionCode = 4 extVersionCode = 5
} }
apply from: "$rootDir/common.gradle" apply from: "$rootDir/common.gradle"

View File

@ -27,7 +27,8 @@ class Twi4 : HttpSource() {
override val name: String = "Twi4" override val name: String = "Twi4"
override val supportsLatest: Boolean = false override val supportsLatest: Boolean = false
private val application: Application by injectLazy() private val application: Application by injectLazy()
private val validPageTest: Regex = Regex("/comics/twi4/\\w+/works/\\d{4}\\.[0-9a-f]{32}\\.jpg") private val validPageTest: Regex =
Regex("/comics/twi4/\\w+/works/\\d{4}\\.[0-9a-zA-Z]{32}\\.jpg")
companion object Constants { companion object Constants {
const val SEARCH_PREFIX_SLUG = "SLUG:" const val SEARCH_PREFIX_SLUG = "SLUG:"
@ -40,47 +41,48 @@ class Twi4 : HttpSource() {
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36",
).build() ).build()
// Popular manga == All manga in the site // Both latest and popular only lists 4 manga in total
override fun fetchPopularManga(page: Int): Observable<MangasPage> { // As the full catalog is consists of less than 50 manga, it is not worth implementing
return client.newCall(popularMangaRequest(page)) // We'll just list all manga in the catalog instead
.asObservableSuccess() override fun popularMangaParse(response: Response): MangasPage {
.map { response ->
parsePopularMangaRequest(response, page < 2)
}
}
private fun parsePopularMangaRequest(response: Response, hasNextPage: Boolean): MangasPage {
val doc = Jsoup.parse(response.body.string()) val doc = Jsoup.parse(response.body.string())
val ret = mutableListOf<SManga>() val ret = mutableListOf<SManga>()
// One of the manga is a link to Twi4's zadankai, which is a platform for anyone to post oneshot 4-koma with judges to comment // Manga that are recently updated don't show up on the full catalog
// It has a completely different page layout and it is pretty much its own "manga site". // So we'll need to parse the recent updates section as well
// Therefore, for simplicity sake. This extension (or at least this source) will not include that as a "Manga" val listings = arrayOf(
val mangas = doc.select("section:not(.zadankai):not([id])") "#lineup_recent > div> section",
for (manga in mangas) { "#lineup > div > section:not(.zadankai):not([id])",
ret.add( )
SManga.create().apply { for (listing in listings) {
thumbnail_url = val mangas = doc.select(listing)
getUrlDomain() + manga.select("header > div.figgroup > figure > a > img") for (manga in mangas) {
.attr("src") ret.add(
setUrlWithoutDomain( SManga.create().apply {
getUrlDomain() + manga.select("header > div.hgroup > h3 > a").attr("href"), thumbnail_url =
) getUrlDomain() + manga.select("div.figgroup > figure > a > img")
title = manga.select("header > div.hgroup > h3 > a > strong").text() .attr("src")
}, setUrlWithoutDomain(
) getUrlDomain() + manga.select("div.hgroup > h3 > a").attr("href"),
)
title = manga.select("div.hgroup > h3 > a").text()
author = manga.select("div.hgroup > p").text()
status =
if (manga.select("ul:first-child > li:last-child > em.is-completed")
.isEmpty()
) {
SManga.ONGOING
} else {
SManga.COMPLETED
}
},
)
}
} }
return MangasPage(ret, hasNextPage) return MangasPage(ret, false)
} }
// We have to fetch all manga from two different pages
// One from the homepage (which contains all ongoing manga), one from the completed manga page
// The menu at the top relies on JS which JSoup doesn't load
override fun popularMangaRequest(page: Int): Request { override fun popularMangaRequest(page: Int): Request {
return if (page == 1) { return GET(baseUrl, getChromeHeaders())
GET(baseUrl, getChromeHeaders())
} else {
GET(baseUrl + "completed.html", getChromeHeaders())
}
} }
override fun mangaDetailsRequest(manga: SManga): Request = override fun mangaDetailsRequest(manga: SManga): Request =
@ -125,7 +127,7 @@ class Twi4 : HttpSource() {
} }
} }
} }
status = SManga.UNKNOWN // While the status can be obtained at the home page, there is no such info at the details page
} }
} }
@ -203,16 +205,14 @@ class Twi4 : HttpSource() {
.let { re.replace(it, "\"$1\":") } .let { re.replace(it, "\"$1\":") }
indexResponse.close() indexResponse.close()
val indexElement = index.let { Json.parseToJsonElement(it) } val indexElement = index.let { Json.parseToJsonElement(it) }
var suffix: String? = null val suffix =
if (indexElement != null) { indexElement.jsonObject["Items"]?.jsonArray?.get(chapterNum - 1)?.jsonObject?.get("Suffix")?.jsonPrimitive?.content
// Each entry in the Items array corresponds to 1 chapter/page
suffix = indexElement.jsonObject["Items"]?.jsonArray?.get(chapterNum - 1)?.jsonObject?.get("Suffix")?.jsonPrimitive?.content
}
// Twi4's image links are a bit of a mess // Twi4's image links are a bit of a mess
// Because in very rare cases, the image filename *doesn't* come with a suffix // Because in very rare cases, the image filename *doesn't* come with a suffix
// So only attach the suffix if there is one // So only attach the suffix if there is one
if (suffix != null) { if (suffix != null) {
imageUrl = getUrlDomain() + page.select("div > div > p > img").attr("src").dropLast(4) + suffix + ".jpg" imageUrl = getUrlDomain() + page.select("div > div > p > img").attr("src")
.dropLast(4) + suffix + ".jpg"
} }
} }
ret.add( ret.add(
@ -239,7 +239,7 @@ class Twi4 : HttpSource() {
// There will still be some urls that would accidentally activate the intent (like the news page), // There will still be some urls that would accidentally activate the intent (like the news page),
// but there's no way to avoid it. // but there's no way to avoid it.
if (slug.endsWith("html") || slug.startsWith("zadankai")) { if (slug.endsWith("html") || slug.startsWith("zadankai") || slug.startsWith("others")) {
return Observable.just(MangasPage(listOf(), false)) return Observable.just(MangasPage(listOf(), false))
} }
return client.newCall(GET(baseUrl + slug)) return client.newCall(GET(baseUrl + slug))
@ -268,9 +268,6 @@ class Twi4 : HttpSource() {
override fun latestUpdatesRequest(page: Int): Request = override fun latestUpdatesRequest(page: Int): Request =
throw UnsupportedOperationException("Not used") throw UnsupportedOperationException("Not used")
override fun popularMangaParse(response: Response): MangasPage =
throw UnsupportedOperationException("Not used")
override fun imageUrlParse(response: Response): String = override fun imageUrlParse(response: Response): String =
throw UnsupportedOperationException("Not used") throw UnsupportedOperationException("Not used")