Fix incorrect ResponseBody parsing (#12893)

* Fix incorrect ResponseBody parsing

* enable rate limit
This commit is contained in:
Vetle Ledaal 2022-08-07 17:37:21 +00:00 committed by GitHub
parent 5d8494e775
commit e0e3afa793
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 52 additions and 46 deletions

View File

@ -5,7 +5,7 @@ ext {
extName = 'Koushoku'
pkgNameSuffix = 'en.koushoku'
extClass = '.Koushoku'
extVersionCode = 12
extVersionCode = 13
isNsfw = true
}

View File

@ -13,6 +13,7 @@ import eu.kanade.tachiyomi.source.online.ParsedHttpSource
import eu.kanade.tachiyomi.util.asJsoup
import okhttp3.Headers
import okhttp3.HttpUrl
import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.HttpUrl.Companion.toHttpUrlOrNull
import okhttp3.OkHttpClient
import okhttp3.Request
@ -20,15 +21,15 @@ import okhttp3.Response
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import rx.Observable
import java.net.URL
class Koushoku : ParsedHttpSource() {
companion object {
const val PREFIX_ID_SEARCH = "id:"
val archiveRegex = "/archive/(\\d+)".toRegex()
const val thumbnailSelector = ".thumbnail img"
const val magazinesSelector = ".metadata .magazines a"
const val thumbnailSelector = "figure img"
const val magazinesSelector = ".metadata a[href^='/magazines/']"
private val PATTERN_IMAGES = "(.+/)(\\d+)(.*)".toRegex()
}
override val baseUrl = "https://koushoku.org"
@ -42,17 +43,17 @@ class Koushoku : ParsedHttpSource() {
.build()
override fun headersBuilder(): Headers.Builder = super.headersBuilder()
.set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36")
.set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36")
.add("Referer", "$baseUrl/")
override fun latestUpdatesRequest(page: Int) = GET("$baseUrl/?page=$page", headers)
override fun latestUpdatesSelector() = "#archives.feed .entries > .entry"
override fun latestUpdatesNextPageSelector() = "#archives.feed .pagination .next"
override fun latestUpdatesNextPageSelector() = "footer nav li:has(a.active) + li:not(:last-child) > a"
override fun latestUpdatesFromElement(element: Element) = SManga.create().apply {
setUrlWithoutDomain(element.select("a").attr("href"))
title = element.select(".title").text()
thumbnail_url = element.select(thumbnailSelector).attr("src")
setUrlWithoutDomain(element.selectFirst("a").attr("href"))
title = element.selectFirst("[title]").attr("title")
thumbnail_url = element.selectFirst(thumbnailSelector).absUrl("src")
}
private fun searchMangaByIdRequest(id: String) = GET("$baseUrl/archive/$id", headers)
@ -131,13 +132,13 @@ class Koushoku : ParsedHttpSource() {
}
override fun mangaDetailsParse(document: Document) = SManga.create().apply {
title = document.selectFirst(".metadata .title").text()
title = document.selectFirst(".metadata > h1").text()
// Reuse cover from browse
thumbnail_url = document.selectFirst(thumbnailSelector).attr("src")
thumbnail_url = document.selectFirst(thumbnailSelector).absUrl("src")
.replace(Regex("/\\d+\\.webp\$"), "/288.webp")
artist = document.select(".metadata .artists a, .metadata .circles a")
artist = document.select(".metadata a[href^='/artists/'], .metadata a[href^='/circles/']")
.joinToString { it.text() }
author = artist
genre = document.select(".metadata .tags a, $magazinesSelector")
@ -153,7 +154,7 @@ class Koushoku : ParsedHttpSource() {
SChapter.create().apply {
setUrlWithoutDomain(response.request.url.encodedPath)
name = "Chapter"
date_upload = document.select(".metadata .published td:nth-child(2)")
date_upload = document.select("tr > td:first-child:contains(Uploaded Date) + td")
.attr("data-unix").toLong() * 1000
}
)
@ -171,22 +172,26 @@ class Koushoku : ParsedHttpSource() {
if (totalPages == 0)
throw UnsupportedOperationException("Error: Empty pages (try Webview)")
val id = archiveRegex.find(document.location())?.groups?.get(1)?.value
if (id.isNullOrEmpty())
throw UnsupportedOperationException("Error: Unknown archive id")
val url = URL(document.selectFirst(".main img, main img").attr("src"))
val origin = "${url.protocol}://${url.host}"
val url = document.selectFirst(".main img, main img").absUrl("src")
val match = PATTERN_IMAGES.find(url)!!
val prefix = match.groupValues[1]
val suffix = match.groupValues[3]
return (1..totalPages).map {
Page(it, "", "$origin/data/$id/$it.jpg")
Page(it, "", "$prefix$it$suffix")
}
}
override fun imageRequest(page: Page): Request {
val newHeaders = headersBuilder()
.add("Origin", baseUrl)
.build()
val url = page.imageUrl!!.toHttpUrl()
val newHeaders = if (baseUrl.toHttpUrl().host != url.host) {
headersBuilder()
.add("Origin", baseUrl)
.build()
} else {
headers
}
return GET(page.imageUrl!!, newHeaders)
}
@ -257,17 +262,17 @@ class Koushoku : ParsedHttpSource() {
append("\n")
}
val parodies = document.select(".metadata .parodies a")
val parodies = document.select(".metadata a[href^='/parodies/']")
if (parodies.isNotEmpty()) {
append("Parodies: ")
append(parodies.joinToString { it.text() })
append("\n")
}
val pages = document.selectFirst(".metadata .pages td:nth-child(2)")
val pages = document.selectFirst("tr > td:first-child:contains(Pages) + td")
append("Pages: ").append(pages.text()).append("\n")
val size = document.selectFirst(".metadata .size td:nth-child(2)")
val size = document.selectFirst("tr > td:first-child:contains(Size) + td")
append("Size: ").append(size.text())
}
}

View File

@ -1,6 +1,5 @@
package eu.kanade.tachiyomi.extension.en.koushoku
import android.annotation.SuppressLint
import android.app.Application
import android.os.Handler
import android.os.Looper
@ -21,46 +20,48 @@ class KoushokuWebViewInterceptor : Interceptor {
val response = chain.proceed(request)
if (response.headers("Content-Type").any { it.contains("text/html") }) {
val responseBody = response.peekBody(1 * 1024 * 1024).toString()
val document = Jsoup.parse(responseBody)
if (document.selectFirst("h1")?.text()?.contains(Regex("banned$")) == true) {
throw IOException("You have been banned. Check WebView for details.")
val responseBody = response.peekBody(1 * 1024 * 1024).string()
if (response.code == 403) {
val document = Jsoup.parse(responseBody)
if (document.selectFirst("h1")?.text()?.contains(Regex("banned$")) == true) {
throw IOException("You have been banned. Check WebView for details.")
}
}
try {
proceedWithWebView(response)
} catch (e: Exception) {
throw IOException(e)
if (response.networkResponse != null) {
try {
proceedWithWebView(response, responseBody)
} catch (e: Exception) {
throw IOException(e)
}
}
}
return response
}
@SuppressLint("SetJavaScriptEnabled")
private fun proceedWithWebView(response: Response) {
private fun proceedWithWebView(response: Response, responseBody: String) {
val latch = CountDownLatch(1)
val handler = Handler(Looper.getMainLooper())
handler.post {
val webview = WebView(Injekt.get<Application>())
with(webview.settings) {
javaScriptEnabled = true
val webView = WebView(Injekt.get<Application>())
with(webView.settings) {
loadsImagesAutomatically = false
userAgentString = response.request.header("User-Agent")
}
webview.webViewClient = object : WebViewClient() {
webView.webViewClient = object : WebViewClient() {
override fun onPageFinished(view: WebView, url: String) {
webView.stopLoading()
webView.destroy()
latch.countDown()
}
}
webview.loadDataWithBaseURL(
webView.loadDataWithBaseURL(
response.request.url.toString(),
response.peekBody(1 * 1024 * 1024).toString(),
responseBody,
"text/html",
"utf-8",
null