Toomics: Fix content loading (#7004)

* Fix selectors

* Add isNsfw

* Remove interpectors and use HttpUrl

* Fix lint

* Move code from fetchSearchManga to searchMangaParse
This commit is contained in:
Chopper 2025-01-10 20:45:55 -03:00 committed by Draff
parent 580b2b1b16
commit 038e9dcdcb
No known key found for this signature in database
GPG Key ID: E8A89F3211677653
2 changed files with 97 additions and 46 deletions

View File

@ -1,7 +1,8 @@
ext { ext {
extName = 'Toomics' extName = 'Toomics'
extClass = '.ToomicsFactory' extClass = '.ToomicsFactory'
extVersionCode = 7 extVersionCode = 8
isNsfw = true
} }
apply from: "$rootDir/common.gradle" apply from: "$rootDir/common.gradle"

View File

@ -3,17 +3,26 @@ package eu.kanade.tachiyomi.extension.all.toomics
import eu.kanade.tachiyomi.network.GET import eu.kanade.tachiyomi.network.GET
import eu.kanade.tachiyomi.network.POST import eu.kanade.tachiyomi.network.POST
import eu.kanade.tachiyomi.source.model.FilterList import eu.kanade.tachiyomi.source.model.FilterList
import eu.kanade.tachiyomi.source.model.MangasPage
import eu.kanade.tachiyomi.source.model.Page import eu.kanade.tachiyomi.source.model.Page
import eu.kanade.tachiyomi.source.model.SChapter import eu.kanade.tachiyomi.source.model.SChapter
import eu.kanade.tachiyomi.source.model.SManga import eu.kanade.tachiyomi.source.model.SManga
import eu.kanade.tachiyomi.source.online.ParsedHttpSource import eu.kanade.tachiyomi.source.online.ParsedHttpSource
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.decodeFromStream
import okhttp3.FormBody
import okhttp3.Headers import okhttp3.Headers
import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.OkHttpClient import okhttp3.OkHttpClient
import okhttp3.Request import okhttp3.Request
import okhttp3.RequestBody.Companion.toRequestBody import okhttp3.Response
import org.jsoup.Jsoup
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import rx.Observable import rx.Observable
import uy.kohesive.injekt.injectLazy
import java.net.URLDecoder import java.net.URLDecoder
import java.text.ParseException import java.text.ParseException
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
@ -32,6 +41,8 @@ abstract class ToomicsGlobal(
override val supportsLatest = true override val supportsLatest = true
private val json: Json by injectLazy()
override val client: OkHttpClient = super.client.newBuilder() override val client: OkHttpClient = super.client.newBuilder()
.connectTimeout(1, TimeUnit.MINUTES) .connectTimeout(1, TimeUnit.MINUTES)
.readTimeout(1, TimeUnit.MINUTES) .readTimeout(1, TimeUnit.MINUTES)
@ -42,25 +53,30 @@ abstract class ToomicsGlobal(
.add("Referer", "$baseUrl/$siteLang") .add("Referer", "$baseUrl/$siteLang")
.add("User-Agent", USER_AGENT) .add("User-Agent", USER_AGENT)
override fun popularMangaRequest(page: Int): Request { // ================================== Popular =======================================
return GET("$baseUrl/$siteLang/webtoon/favorite", headers)
}
// ToomicsGlobal does not have a popular list, so use recommended instead. override fun popularMangaRequest(page: Int) = GET("$baseUrl/$siteLang/webtoon/ranking", headers)
override fun popularMangaSelector(): String = "li > div.visual"
override fun popularMangaSelector(): String = "li > div.visual a:has(img)"
override fun popularMangaFromElement(element: Element): SManga = SManga.create().apply { override fun popularMangaFromElement(element: Element): SManga = SManga.create().apply {
title = element.select("h4[class$=title]").first()!!.ownText() title = element.select("h4[class$=title]").first()!!.ownText()
// sometimes href contains "/ab/on" at the end and redirects to a chapter instead of manga
setUrlWithoutDomain(element.select("a").attr("href").removeSuffix("/ab/on")) thumbnail_url = element.selectFirst("img")?.let { img ->
thumbnail_url = element.select("img").attr("src") when {
img.hasAttr("data-original") -> img.attr("data-original")
else -> img.attr("src")
}
}
// The path segment '/search/Y' bypasses the age check and prevents redirection to the chapter
setUrlWithoutDomain("${element.absUrl("href")}/search/Y")
} }
override fun popularMangaNextPageSelector(): String? = null override fun popularMangaNextPageSelector(): String? = null
override fun latestUpdatesRequest(page: Int): Request { // ================================== Latest =======================================
return GET("$baseUrl/$siteLang/webtoon/new_comics", headers)
} override fun latestUpdatesRequest(page: Int) = GET("$baseUrl/$siteLang/webtoon/new_comics", headers)
override fun latestUpdatesSelector(): String = popularMangaSelector() override fun latestUpdatesSelector(): String = popularMangaSelector()
@ -68,51 +84,60 @@ abstract class ToomicsGlobal(
override fun latestUpdatesNextPageSelector(): String? = null override fun latestUpdatesNextPageSelector(): String? = null
// ================================== Search =======================================
override fun searchMangaRequest(page: Int, query: String, filters: FilterList): Request { override fun searchMangaRequest(page: Int, query: String, filters: FilterList): Request {
val newHeaders = headersBuilder() val formBody = FormBody.Builder()
.add("Content-Type", "application/x-www-form-urlencoded") .add("toonData", query)
.build() .build()
return POST("$baseUrl/$siteLang/webtoon/ajax_search", headers, formBody)
val rbody = "toonData=$query&offset=0&limit=20".toRequestBody(null)
return POST("$baseUrl/$siteLang/webtoon/ajax_search", newHeaders, rbody)
} }
override fun searchMangaSelector(): String = "div.recently_list ul li" override fun searchMangaSelector(): String = "#search-list-items li"
override fun searchMangaFromElement(element: Element): SManga = SManga.create().apply { override fun searchMangaFromElement(element: Element): SManga = SManga.create().apply {
title = element.select("a div.search_box dl dt span.title").text() title = element.selectFirst("strong")!!.text()
thumbnail_url = element.select("div.search_box p.img img").attr("abs:src") thumbnail_url = element.selectFirst("img")?.absUrl("src")
// When the family mode is off, the url is encoded and is available in the onclick. element.selectFirst("a.relative")!!.attr("href").let {
element.select("a:not([href^=javascript])").let { val href = it.substringAfter("Base.setFamilyMode('N', '").substringBefore("'")
if (it != null) { val url = when {
setUrlWithoutDomain(it.attr("href")) href.contains(baseUrl, true) -> href.toHttpUrl()
} else { else -> "$baseUrl${URLDecoder.decode(href, "UTF-8")}".toHttpUrl()
val toonId = element.select("a").attr("onclick")
.substringAfter("Base.setDisplay('A', '")
.substringBefore("'")
.let { url -> URLDecoder.decode(url, "UTF-8") }
.substringAfter("?toon=")
.substringBefore("&")
url = "/$siteLang/webtoon/episode/toon/$toonId"
} }
// The path segment '/search/Y' bypasses the age check and prevents redirection to the chapter
setUrlWithoutDomain("$baseUrl/$siteLang/webtoon/episode/toon/${url.queryParameter("toon")}/search/Y")
} }
} }
override fun searchMangaNextPageSelector(): String? = null override fun searchMangaNextPageSelector(): String? = null
override fun mangaDetailsParse(document: Document): SManga = SManga.create().apply { override fun searchMangaParse(response: Response): MangasPage {
val header = document.select("#glo_contents header.ep-cover_ch div.title_content") val searchDto = json.decodeFromStream<SearchDto>(response.body.byteStream())
val document = Jsoup.parseBodyFragment(searchDto.content.clearHtml(), baseUrl)
title = header.select("h1").text() val mangas = document.select(searchMangaSelector()).map(::searchMangaFromElement)
author = header.select("p.type_box span.writer").text() return MangasPage(mangas, false)
artist = header.select("p.type_box span.writer").text()
genre = header.select("p.type_box span.type").text().replace("/", ",")
description = header.select("h2").text()
thumbnail_url = document.select("head meta[property='og:image']").attr("content")
} }
// ================================== Manga Details ================================
override fun mangaDetailsParse(document: Document): SManga = SManga.create().apply {
val header = document.selectFirst("#glo_contents section.relative:has(img[src*=thumb])")!!
title = header.selectFirst("h2")!!.text()
header.selectFirst(".mb-0.text-xs.font-normal")?.let {
val info = it.text().split("|")
artist = info.first()
author = info.last()
}
genre = header.selectFirst("dt:contains(genres) + dd")?.text()?.replace("/", ",")
description = header.selectFirst(".break-noraml.text-xs")?.text()
thumbnail_url = document.selectFirst("head meta[property='og:image']")?.attr("content")
}
// ================================== Chapters =====================================
override fun fetchChapterList(manga: SManga): Observable<List<SChapter>> { override fun fetchChapterList(manga: SManga): Observable<List<SChapter>> {
return super.fetchChapterList(manga) return super.fetchChapterList(manga)
.map { it.reversed() } .map { it.reversed() }
@ -122,18 +147,20 @@ abstract class ToomicsGlobal(
override fun chapterListSelector(): String = "li.normal_ep:has(.coin-type1, .coin-type6)" override fun chapterListSelector(): String = "li.normal_ep:has(.coin-type1, .coin-type6)"
override fun chapterFromElement(element: Element): SChapter = SChapter.create().apply { override fun chapterFromElement(element: Element): SChapter = SChapter.create().apply {
val num = element.select("div.cell-num").text() val num = element.selectFirst("div.cell-num")!!.text()
val numText = if (num.isNotEmpty()) "$num - " else "" val numText = if (num.isNotEmpty()) "$num - " else ""
name = numText + element.select("div.cell-title strong").first()?.ownText() name = numText + (element.selectFirst("div.cell-title strong")?.ownText() ?: "")
chapter_number = num.toFloatOrNull() ?: -1f chapter_number = num.toFloatOrNull() ?: -1f
date_upload = parseChapterDate(element.select("div.cell-time time").text()) date_upload = parseChapterDate(element.select("div.cell-time time").text())
scanlator = "Toomics" scanlator = "Toomics"
url = element.select("a").attr("onclick") url = element.selectFirst("a")!!.attr("onclick")
.substringAfter("href='") .substringAfter("href='")
.substringBefore("'") .substringBefore("'")
} }
// ================================== Pages ========================================
override fun pageListParse(document: Document): List<Page> { override fun pageListParse(document: Document): List<Page> {
if (document.select("div.section_age_verif").isNotEmpty()) { if (document.select("div.section_age_verif").isNotEmpty()) {
throw Exception("Verify age via WebView") throw Exception("Verify age via WebView")
@ -155,6 +182,15 @@ abstract class ToomicsGlobal(
return GET(page.imageUrl!!, newHeaders) return GET(page.imageUrl!!, newHeaders)
} }
// ================================== Utilities ====================================
@Serializable
class SearchDto(@SerialName("webtoon") private val html: Html) {
val content: String get() = html.data
@Serializable
class Html(@SerialName("sHtml") val data: String)
}
private fun parseChapterDate(date: String): Long { private fun parseChapterDate(date: String): Long {
return try { return try {
dateFormat.parse(date)?.time ?: 0L dateFormat.parse(date)?.time ?: 0L
@ -163,7 +199,21 @@ abstract class ToomicsGlobal(
} }
} }
fun String.clearHtml(): String {
return this.unicode().replace(ESCAPE_CHAR_REGEX, "")
}
fun String.unicode(): String {
return UNICODE_REGEX.replace(this) { match ->
val hex = match.groupValues[1].ifEmpty { match.groupValues[2] }
val value = hex.toInt(16)
value.toChar().toString()
}
}
companion object { companion object {
private const val USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36" private const val USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"
val UNICODE_REGEX = "\\\\u([0-9A-Fa-f]{4})|\\\\U([0-9A-Fa-f]{8})".toRegex()
val ESCAPE_CHAR_REGEX = """(\\n)|(\\r)|(\\{1})""".toRegex()
} }
} }