Rewritten Nicovideo Seiga for new domain and API (#3841)

* Rewritten Nicomanga for new domain * Fix typos to use API URL instead * Fixed missing WebView and many parsing issues * Preserve newlines when displaying description * Bump version ID * Wrapped all requests to DTOs * Minor refactor * Applying all requested changes * Fixed displaying of error message * Applying requested changes * Remove "data" * I forgor * Remove redundant code and add headers
2024-07-05 12:14:43 +01:00 · 2024-07-05 12:14:43 +01:00 · 5cebd2a63a
commit 5cebd2a63a
parent 1518e5b867
3 changed files with 241 additions and 167 deletions
--- a/src/ja/nicovideoseiga/build.gradle
+++ b/src/ja/nicovideoseiga/build.gradle
@ -1,7 +1,7 @@
 ext {
    extName = 'Nicovideo Seiga'
    extClass = '.NicovideoSeiga'
-    extVersionCode = 2
+    extVersionCode = 3
 }

 apply from: "$rootDir/common.gradle"
--- a/src/ja/nicovideoseiga/src/eu/kanade/tachiyomi/extension/ja/nicovideoseiga/ApiResponse.kt
+++ b/src/ja/nicovideoseiga/src/eu/kanade/tachiyomi/extension/ja/nicovideoseiga/ApiResponse.kt
@ -0,0 +1,109 @@
+package eu.kanade.tachiyomi.extension.ja.nicovideoseiga
+
+import kotlinx.serialization.KSerializer
+import kotlinx.serialization.SerialName
+import kotlinx.serialization.Serializable
+import kotlinx.serialization.builtins.ListSerializer
+import kotlinx.serialization.json.JsonArray
+import kotlinx.serialization.json.JsonElement
+import kotlinx.serialization.json.JsonObject
+import kotlinx.serialization.json.JsonTransformingSerializer
+
+@Serializable
+class ApiResponse<T>(
+    val data: Data<T>,
+)
+
+@Serializable
+class Data<T>(
+    @Serializable(with = SingleResultSerializer::class)
+    val result: List<T>,
+    val extra: Extra? = null,
+)
+
+@Serializable
+class Extra(
+    @SerialName("has_next")
+    val hasNext: Boolean? = null,
+)
+
+class SingleResultSerializer<T>(serializer: KSerializer<T>) : JsonTransformingSerializer<List<T>>(ListSerializer(serializer)) {
+    // Wrap single results in a list. Leave multiple results as is.
+    override fun transformSerialize(element: JsonElement): JsonElement {
+        return when (element) {
+            is JsonArray -> element
+            is JsonObject -> JsonArray(listOf(element))
+            else -> throw IllegalStateException("Unexpected JSON element type: $element")
+        }
+    }
+
+    override fun transformDeserialize(element: JsonElement): JsonElement {
+        return if (element is JsonArray) element else JsonArray(listOf(element))
+    }
+}
+
+// Result objects
+
+@Serializable
+class PopularManga(
+    val id: Int,
+    val title: String,
+    val author: String,
+    @SerialName("thumbnail_url")
+    val thumbnailUrl: String,
+)
+
+@Serializable
+class Manga(
+    val id: Int,
+    val meta: MangaMetadata,
+) {
+    @Serializable
+    class MangaMetadata(
+        val title: String,
+        @SerialName("display_author_name")
+        val author: String,
+        val description: String,
+        @SerialName("serial_status")
+        val serialStatus: String,
+        @SerialName("square_image_url")
+        val thumbnailUrl: String,
+        @SerialName("share_url")
+        val shareUrl: String,
+    )
+}
+
+// Frames are the internal name for pages in the API
+@Serializable
+class Frame(
+    val meta: FrameMetadata,
+) {
+    @Serializable
+    class FrameMetadata(
+        @SerialName("source_url")
+        val sourceUrl: String,
+    )
+}
+
+// Chapters are known as Episodes internally in the API
+@Serializable
+class Chapter(
+    val id: Int,
+    val meta: ChapterMetadata,
+    @SerialName("own_status")
+    val ownership: Ownership,
+) {
+    @Serializable
+    class ChapterMetadata(
+        val title: String,
+        val number: Int,
+        @SerialName("created_at")
+        val createdAt: Long,
+    )
+
+    @Serializable
+    class Ownership(
+        @SerialName("sell_status")
+        val sellStatus: String,
+    )
+}
--- a/src/ja/nicovideoseiga/src/eu/kanade/tachiyomi/extension/ja/nicovideoseiga/NicovideoSeiga.kt
+++ b/src/ja/nicovideoseiga/src/eu/kanade/tachiyomi/extension/ja/nicovideoseiga/NicovideoSeiga.kt
@ -1,216 +1,181 @@
 package eu.kanade.tachiyomi.extension.ja.nicovideoseiga

-import android.app.Application
 import eu.kanade.tachiyomi.network.GET
+import eu.kanade.tachiyomi.network.asObservable
 import eu.kanade.tachiyomi.source.model.FilterList
 import eu.kanade.tachiyomi.source.model.MangasPage
 import eu.kanade.tachiyomi.source.model.Page
 import eu.kanade.tachiyomi.source.model.SChapter
 import eu.kanade.tachiyomi.source.model.SManga
 import eu.kanade.tachiyomi.source.online.HttpSource
-import eu.kanade.tachiyomi.util.asJsoup
+import kotlinx.serialization.decodeFromString
+import kotlinx.serialization.json.Json
 import okhttp3.Interceptor
-import okhttp3.MediaType.Companion.toMediaTypeOrNull
+import okhttp3.MediaType.Companion.toMediaType
 import okhttp3.OkHttpClient
 import okhttp3.Request
 import okhttp3.Response
 import okhttp3.ResponseBody.Companion.toResponseBody
+import org.jsoup.Jsoup
+import rx.Observable
 import uy.kohesive.injekt.injectLazy
 import kotlin.experimental.xor

 class NicovideoSeiga : HttpSource() {
-    // Nicovideo Seiga contains illustrations, manga and books from Bookwalker. This extension will focus on manga only.
-    override val baseUrl: String = "https://seiga.nicovideo.jp"
+    override val baseUrl: String = "https://sp.manga.nicovideo.jp"
    override val lang: String = "ja"
    override val name: String = "Nicovideo Seiga"
-    override val supportsLatest: Boolean = true
+    override val supportsLatest: Boolean = false
    override val client: OkHttpClient = network.client.newBuilder()
        .addInterceptor(::imageIntercept)
        .build()
-    private val application: Application by injectLazy()
+    override val versionId: Int = 2
+    private val apiUrl: String = "https://api.nicomanga.jp/api/v1/app/manga"
+    private val json: Json by injectLazy()

-    override fun latestUpdatesParse(response: Response): MangasPage {
-        val currentPage = response.request.url.queryParameter("page")!!.toInt()
-        val doc = response.asJsoup()
-        val mangaCount = doc.select("#main_title > h2 > span").text().trim().dropLast(1).toInt()
-        val mangaPerPage = 20
-        val mangaList = doc.select("#comic_list > ul > li")
-        val mangas = ArrayList<SManga>()
-        for (manga in mangaList) {
-            val mangaElement = manga.select("div > .description > div > div")
-            mangas.add(
+    override fun latestUpdatesParse(response: Response): MangasPage =
+        throw UnsupportedOperationException()
+
+    override fun latestUpdatesRequest(page: Int): Request = throw UnsupportedOperationException()
+
+    override fun popularMangaParse(response: Response): MangasPage {
+        val pageNumber = response.request.url.queryParameter("page")!!.toInt()
+        val mangas = json.decodeFromString<List<PopularManga>>(response.body.string())
+        // The api call allows a maximum of 5 pages
+        return MangasPage(
+            mangas.map {
                SManga.create().apply {
-                    setUrlWithoutDomain(
-                        baseUrl + mangaElement.select(".comic_icon > div > a").attr("href"),
-                    )
-                    title = mangaElement.select(".mg_body > .title > a").text()
-                    // While the site does label who are the author and artists are, there is no formatting standard at all!
-                    // It becomes impossible to parse the names and their specific roles
-                    // So we are not going to process this at all
-                    author = mangaElement.select(".mg_description_header > .mg_author > a").text()
-                    // Nicovideo doesn't provide large thumbnails in their searches and manga listings unfortunately
-                    // A larger thumbnail is only available after going into the details page
-                    thumbnail_url = mangaElement.select(".comic_icon > div > a > img").attr("src")
-                    val statusText =
-                        mangaElement.select(".mg_description_header > .mg_icon > .content_status > span")
-                            .text()
-                    status = when (statusText) {
-                        "連載" -> {
-                            SManga.ONGOING
-                        }
-                        "完結" -> {
-                            SManga.COMPLETED
-                        }
-                        else -> {
-                            SManga.UNKNOWN
-                        }
-                    }
-                },
-            )
-        }
-        return MangasPage(mangas, mangaCount - mangaPerPage * currentPage > 0)
+                    title = it.title
+                    author = it.author
+                    // The thumbnail provided only displays a glimpse of the latest chapter. Not the actual cover
+                    // We can obtain a better thumbnail when the user clicks into the details
+                    thumbnail_url = it.thumbnailUrl
+                    // Store id only as we override the url down the chain
+                    url = it.id.toString()
+                }
+            },
+            pageNumber < 5,
+        )
    }

-    override fun latestUpdatesRequest(page: Int): Request =
-        GET("$baseUrl/manga/list?page=$page&sort=manga_updated")
-
-    override fun popularMangaParse(response: Response): MangasPage = latestUpdatesParse(response)
-
    override fun popularMangaRequest(page: Int): Request =
-        GET("$baseUrl/manga/list?page=$page&sort=manga_view")
+        // This is the only API call that doesn't use the API url
+        GET("$baseUrl/manga/ajax/ranking?span=total&category=all&page=$page", headers)
+
+    // Parses the common manga entry object from the api
+    private fun parseMangaEntry(entry: Manga): SManga {
+        return SManga.create().apply {
+            title = entry.meta.title
+            // The description is html. Simply using Jsoup to remove all the html tags
+            description = Jsoup.parse(entry.meta.description).wholeText()
+            // Although their API does contain individual author fields, they are arbitrary strings and we can't trust it conforms to a format
+            // Use display name instead which puts all of the people involved together
+            author = entry.meta.author
+            thumbnail_url = entry.meta.thumbnailUrl
+            // Store id only as we override the url down the chain
+            url = entry.id.toString()
+            status = when (entry.meta.serialStatus) {
+                "serial" -> SManga.ONGOING
+                "concluded" -> SManga.COMPLETED
+                else -> SManga.UNKNOWN
+            }
+            initialized = true
+        }
+    }

    override fun searchMangaParse(response: Response): MangasPage {
-        val currentPage = response.request.url.queryParameter("page")!!.toInt()
-        val doc = response.asJsoup()
-        val mangaCount =
-            doc.select("#mg_wrapper > div > div.header > div.header__result-summary").text().trim()
-                .split("：")[1].toInt()
-        val mangaPerPage = 20
-        val mangaList = doc.select(".search_result__item")
-        val mangas = ArrayList<SManga>()
-        for (manga in mangaList) {
-            mangas.add(
-                SManga.create().apply {
-                    setUrlWithoutDomain(
-                        baseUrl + manga.select(".search_result__item__thumbnail > a")
-                            .attr("href"),
-                    )
-                    title =
-                        manga.select(".search_result__item__info > .search_result__item__info--title > a")
-                            .text().trim()
-                    // While the site does label who the author and artists are, there is no formatting standard at all!
-                    // It becomes impossible to parse the names and their specific roles
-                    // So we are not going to process this at all
-                    author =
-                        manga.select(".search_result__item__info > .search_result__item__info--author")
-                            .text()
-                    // Nicovideo doesn't provide large thumbnails in their searches and manga listings unfortunately
-                    // A larger thumbnail/cover art is only available after going into the chapter listings
-                    thumbnail_url = manga.select(".search_result__item__thumbnail > a > img")
-                        .attr("data-original")
-                },
-            )
-        }
-        return MangasPage(mangas, mangaCount - mangaPerPage * currentPage > 0)
+        val r = json.decodeFromString<ApiResponse<Manga>>(response.body.string())
+        return MangasPage(r.data.result.map { parseMangaEntry(it) }, r.data.extra!!.hasNext!!)
    }

    override fun searchMangaRequest(page: Int, query: String, filters: FilterList): Request =
-        GET("$baseUrl/manga/search/?q=$query&page=$page&sort=score")
+        GET("$apiUrl/contents?mode=keyword&sort=score&q=$query&limit=20&offset=${(page - 1) * 20}", headers)

-    override fun mangaDetailsParse(response: Response): SManga = SManga.create().apply {
-        val doc = response.asJsoup()
-        // The description is a mix of synopsis and news announcements
-        // This is just how mangakas use this site
-        description =
-            doc.select("#contents > div.mg_work_detail > div > div.row > div.description_text")
-                .text()
-        // A better larger cover art is available here
-        thumbnail_url =
-            doc.select("#contents > div.primaries > div.main_visual > a > img").attr("src")
-        val statusText =
-            doc.select("#contents > div.mg_work_detail > div > div:nth-child(2) > div.tip.content_status.status_series > span")
-                .text()
-        status = when (statusText) {
-            "連載" -> {
-                SManga.ONGOING
-            }
-            "完結" -> {
-                SManga.COMPLETED
-            }
-            else -> {
-                SManga.UNKNOWN
-            }
-        }
+    override fun mangaDetailsParse(response: Response): SManga {
+        val r = json.decodeFromString<ApiResponse<Manga>>(response.body.string())
+        return parseMangaEntry(r.data.result.first())
+    }
+
+    override fun mangaDetailsRequest(manga: SManga): Request {
+        // Overwrite to use the API instead of scraping the shared URL
+        return GET("$apiUrl/contents/${manga.url}", headers)
+    }
+
+    override fun getMangaUrl(manga: SManga): String {
+        // Return functionality to WebView
+        return "$baseUrl/comic/${manga.url}"
    }

    override fun chapterListParse(response: Response): List<SChapter> {
-        val doc = response.asJsoup()
-        val chapters = ArrayList<SChapter>()
-        val chapterList = doc.select("#episode_list > ul > li")
-        val mangaId = response.request.url.toUrl().toString().substringAfterLast('/').substringBefore('?')
-        val sharedPref = application.getSharedPreferences("source_${id}_time_found:$mangaId", 0)
-        val editor = sharedPref.edit()
-        // After logging in, any chapters bought should show up as well
-        // Users will need to refresh their chapter list after logging in
-        for (chapter in chapterList) {
-            chapters.add(
+        val r = json.decodeFromString<ApiResponse<Chapter>>(response.body.string())
+        return r.data.result
+            // Chapter is unpublished by publishers from Niconico
+            // Either due to licensing issues or the publisher is withholding the chapter from selling
+            .filter { it.ownership.sellStatus != "publication_finished" }
+            .map { chapter ->
                SChapter.create().apply {
-                    // Unfortunately we cannot filter out promotional materials in the chapter list,
-                    // nor we can determine the chapter number from the title
-                    // That would require understanding the context of the title (See One Punch Man and Uzaki-chan for example)
-                    // Unless we have a machine learning algorithm in place, it's simply not possible
-                    name = chapter.select("div > div.description > div.title > a").text()
-                    setUrlWithoutDomain(
-                        baseUrl + chapter.select("div > div.description > div.title > a")
-                            .attr("href"),
-                    )
-                    // The data-number attribute is the only way we can determine chapter orders,
-                    // without that this extension would have been impossible to make
-                    // Note: Promotional materials also count as "chapters" here, so auto tracking unfortunately does not work at all
-                    chapter_number = chapter.select("div").attr("data-number").toFloat()
-                    // We can't determine the upload date from the website
-                    // Store date_upload when a chapter is found for the first time
-                    val dateFound = System.currentTimeMillis()
-                    if (!sharedPref.contains(chapter_number.toString())) {
-                        editor.putLong(chapter_number.toString(), dateFound)
-                    }
-                    date_upload = sharedPref.getLong(chapter_number.toString(), dateFound)
-                },
-            )
-        }
-        editor.apply()
-        chapters.sortByDescending { chapter -> chapter.chapter_number }
-        return chapters
+                    val isPaid = chapter.ownership.sellStatus == "selling"
+                    name = (if (isPaid) "\uD83D\uDCB4 " else "") + chapter.meta.title
+                    // Timestamp is in seconds, convert to milliseconds
+                    date_upload = chapter.meta.createdAt * 1000
+                    // While chapters are properly sorted, authors often add promotional material as "chapters" which breaks trackers
+                    // There's no way to properly filter these as they are treated the same as normal chapters
+                    chapter_number = chapter.meta.number.toFloat()
+                    // Store id only as we override the url down the chain
+                    url = chapter.id.toString()
+                }
+            }
+            .sortedByDescending { it.chapter_number }
+    }
+
+    override fun chapterListRequest(manga: SManga): Request {
+        // Overwrite to use the API instead of scraping the shared URL
+        return GET("$apiUrl/contents/${manga.url}/episodes", headers)
+    }
+
+    override fun getChapterUrl(chapter: SChapter): String {
+        return "$baseUrl/watch/mg${chapter.url}"
+    }
+
+    override fun fetchPageList(chapter: SChapter): Observable<List<Page>> {
+        return client.newCall(pageListRequest(chapter))
+            .asObservable()
+            .map { response ->
+                // Nicomanga historically refuses to serve pages if you don't login.
+                // However, due to the network attack against the site (as of July 2024) login is disabled
+                // Changes may be required as the site recovers
+                if (response.code == 403) {
+                    throw SecurityException("You need to purchase this chapter first")
+                }
+                if (response.code == 401) {
+                    throw SecurityException("Not logged in. Please login via WebView")
+                }
+                if (response.code != 200) {
+                    throw Exception("HTTP error ${response.code}")
+                }
+                pageListParse(response)
+            }
    }

    override fun pageListParse(response: Response): List<Page> {
-        val doc = response.asJsoup()
-        val pages = ArrayList<Page>()
-        // Nicovideo will refuse to serve any pages if the user has not logged in
-        if (!doc.select("#login_manga").isEmpty()) {
-            throw SecurityException("Not logged in. Please login via WebView first")
-        }
-        val pageList = doc.select("#page_contents > li")
-        for (page in pageList) {
-            val pageNumber = page.attr("data-page-index").toInt()
-            val url = page.select("div > img").attr("data-original")
-            pages.add(Page(pageNumber, url, url))
-        }
-        return pages
+        val r = json.decodeFromString<ApiResponse<Frame>>(response.body.string())
+        // Map the frames to pages
+        return r.data.result.mapIndexed { i, frame -> Page(i, frame.meta.sourceUrl, frame.meta.sourceUrl) }
+    }
+
+    override fun pageListRequest(chapter: SChapter): Request {
+        // Overwrite to use the API instead of scraping the shared URL
+        return GET("$apiUrl/episodes/${chapter.url}/frames?enable_webp=true", headers)
    }

    override fun imageRequest(page: Page): Request {
        // Headers are required to avoid cache miss from server side
        val headers = headersBuilder()
-            .set("referer", "https://seiga.nicovideo.jp/")
+            .set("referer", "$baseUrl/")
            .set("accept", "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8")
            .set("pragma", "no-cache")
            .set("cache-control", "no-cache")
            .set("accept-encoding", "gzip, deflate, br")
-            .set(
-                "user-agent",
-                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36",
-            )
            .set("sec-fetch-dest", "image")
            .set("sec-fetch-mode", "no-cors")
            .set("sec-fetch-site", "cross-site")
@ -239,14 +204,14 @@ class NicovideoSeiga : HttpSource() {
        val decryptedImage = decryptImage(key, encryptedImage)

        // Construct a new response
-        val body = decryptedImage.toResponseBody("image/${getImageType(decryptedImage)}".toMediaTypeOrNull())
+        val body =
+            decryptedImage.toResponseBody("image/${getImageType(decryptedImage)}".toMediaType())
        return response.newBuilder().body(body).build()
    }

    /**
     * Paid images are xor encrypted in Nicovideo.
-     * The image url is displayed in the document in noscript environment
-     * It will look like the following:
+     * Take this example:
     * https://drm.cdn.nicomanga.jp/image/d952d4bc53ddcaafffb42d628239ebed4f66df0f_9477/12057916p.webp?1636382474
     *                                    ^^^^^^^^^^^^^^^^
     * The encryption key is stored directly on the URL. Up there. Yes, it stops right there