From d161dafd1799963c2169b013a840bac76429cb75 Mon Sep 17 00:00:00 2001 From: Federico d'Alonzo Date: Fri, 22 Dec 2023 18:30:58 +0100 Subject: [PATCH] Project Suki: Enhancements and fixes (#19323) * build(gradle)!: Migrate ProjectSuki build.gradle to kotlin dsl * feat: Add PathPattern * feat: Add DataExtractor * feat: Add ProjectSukiAPI * feat: Add ProjectSukiFilters * refactor!: migrate to new API and cleanup extension Completely replace NormalizedURL with HttpUrl, remove PS.kt, PSBook.kt and PSFilters.kt * chore(naming): rename pattern properties to be consistent * refactor(preferences): Centralize and cleanup preferences * chore(preferences): remove Android Preference import * refactor(everything): Fix most of everything Now apk builds, and correctly fetches books, chapters and images, including thumbnails. * revert(gradle): revert to build.gradle.kts to be consistent with other extensions as context receivers are still unusable * feat(url-activity): enhance Needs to be tested, got distracted * feat(preferences): Enhance preferences by providing more robust constructs * feat(filters): Update and enhance filters * feat(site-api): add search request data request and response parse * refactor: replace require and error with reportErrorToUser in PathPattern * refactor(core): Enhance everything Now extension will show browse results on popular, main page on latest, will default to actually-useful search (with naive option on older devices) while still allowing old search. Enhance user interaction by capturing or preventing almost all errors and alerting the user on what went wrong and what to do. * chore: Suppress warnings * docs: Document everything Add documentation and revise pretty much everything. * docs: Add CHANGELOG.md * docs: Add README.md * refactor(search-mode): Combine Naive/Full Site/Strict search options into single filter * revert(manifest): Remove android:icon it's set in the core AndroidManifest.xml * chore(lang): switch extension language to "all" explicitly set id: 8965918600406781666 * fix(preferences): fix blacklisted languages id was the same as whitelisted * fix: Fix bugs and more Change Naive to Simple, provide more understandable description, make it possible to use Simple mode on any Android version if one wishes to do so. Provide better regex for Simple search. Test chapter filtering, download (single chapters and multiple), all searches, chapter view. * docs: Update README and CHANGELOG * refactor(url-activity): Refactor Url Activity from kotlin to java Process kept complaining about java.lang.ClassNotFoundException: kotlin.jvm.internal.Intrinsics * revert(url-activity): Avoid kotlin Intrinsics --- src/all/projectsuki/AndroidManifest.xml | 29 +- src/all/projectsuki/CHANGELOG.md | 20 + src/all/projectsuki/README.md | 9 + src/all/projectsuki/build.gradle | 5 +- .../all/projectsuki/DataExtractor.kt | 902 ++++++++++++++++++ .../all/projectsuki/NormalizedURL.kt | 54 -- .../tachiyomi/extension/all/projectsuki/PS.kt | 129 --- .../extension/all/projectsuki/PSBook.kt | 11 - .../extension/all/projectsuki/PSFilters.kt | 90 -- .../extension/all/projectsuki/PathPattern.kt | 84 ++ .../extension/all/projectsuki/ProjectSuki.kt | 824 +++++++++------- .../all/projectsuki/ProjectSukiAPI.kt | 353 +++++++ .../all/projectsuki/ProjectSukiFilters.kt | 157 +++ .../all/projectsuki/ProjectSukiPreferences.kt | 117 +++ .../ProjectSukiSearchUrlActivity.kt | 67 ++ .../all/projectsuki/ProjectSukiUrlActivity.kt | 33 - 16 files changed, 2212 insertions(+), 672 deletions(-) create mode 100644 src/all/projectsuki/CHANGELOG.md create mode 100644 src/all/projectsuki/README.md create mode 100644 src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/DataExtractor.kt delete mode 100644 src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/NormalizedURL.kt delete mode 100644 src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PS.kt delete mode 100644 src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSBook.kt delete mode 100644 src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSFilters.kt create mode 100644 src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PathPattern.kt create mode 100644 src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSukiAPI.kt create mode 100644 src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSukiFilters.kt create mode 100644 src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSukiPreferences.kt create mode 100644 src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSukiSearchUrlActivity.kt delete mode 100644 src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSukiUrlActivity.kt diff --git a/src/all/projectsuki/AndroidManifest.xml b/src/all/projectsuki/AndroidManifest.xml index 867eb056f..6a117da70 100644 --- a/src/all/projectsuki/AndroidManifest.xml +++ b/src/all/projectsuki/AndroidManifest.xml @@ -2,33 +2,32 @@ + + + + - - + + + + - - + + - - - - + + + diff --git a/src/all/projectsuki/CHANGELOG.md b/src/all/projectsuki/CHANGELOG.md new file mode 100644 index 000000000..b36c92efd --- /dev/null +++ b/src/all/projectsuki/CHANGELOG.md @@ -0,0 +1,20 @@ +## Version 1.4.2 + +- Improved search feature +- New and improved Popular tab +- Old Popular tab moved to Latest +- Fixed chapter numbering issues when "Chapter" wasn't explicitly present (e.g. "Ch. 2") +- Added chapter number inference for when the above fails +- Improved user feedback for errors and issues +- Fixed wording and clarity on most descriptions +- Added simple search option for Android API < 24 +- Chapter language will now appear right of the scan group +- Enhanced chapters sorting (number > group > language) +- Changed extension language from English to Multi + +## Version 1.4.1 + +First version of the extension: + +- basic functionality +- basic search, limited to full-site \ No newline at end of file diff --git a/src/all/projectsuki/README.md b/src/all/projectsuki/README.md new file mode 100644 index 000000000..5d44d2754 --- /dev/null +++ b/src/all/projectsuki/README.md @@ -0,0 +1,9 @@ +# Project Suki + +Go check out our general FAQs and Guides over at +[Extension FAQ](https://tachiyomi.org/help/faq/#extensions) or +[Getting Started](https://tachiyomi.org/help/guides/getting-started/#installation). + +If you still don't find the answer you're looking for you're welcome to open an +[issue](https://github.com/tachiyomiorg/tachiyomi-extensions/issues) +and mention [me](https://github.com/npgx/) *in the issue*. diff --git a/src/all/projectsuki/build.gradle b/src/all/projectsuki/build.gradle index 5ea5dc348..def93d091 100644 --- a/src/all/projectsuki/build.gradle +++ b/src/all/projectsuki/build.gradle @@ -1,15 +1,16 @@ apply plugin: 'com.android.application' apply plugin: 'kotlin-android' +apply plugin: 'kotlinx-serialization' ext { extName = 'Project Suki' pkgNameSuffix = 'all.projectsuki' extClass = '.ProjectSuki' - extVersionCode = 1 + extVersionCode = 2 } dependencies { - implementation(project(":lib-randomua")) + implementation project(":lib-randomua") } apply from: "$rootDir/common.gradle" diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/DataExtractor.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/DataExtractor.kt new file mode 100644 index 000000000..f9b775a70 --- /dev/null +++ b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/DataExtractor.kt @@ -0,0 +1,902 @@ +package eu.kanade.tachiyomi.extension.all.projectsuki + +import okhttp3.HttpUrl +import okhttp3.HttpUrl.Companion.toHttpUrl +import okhttp3.HttpUrl.Companion.toHttpUrlOrNull +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element +import org.jsoup.select.Elements +import java.text.SimpleDateFormat +import java.util.Calendar +import java.util.Date +import java.util.EnumMap +import java.util.Locale +import java.util.TimeZone + +/** + * @see EXTENSION_INFO Found in ProjectSuki.kt + */ +@Suppress("unused") +private inline val INFO: Nothing get() = error("INFO") + +internal typealias BookID = String +internal typealias ChapterID = String +internal typealias ScanGroup = String + +/** + * Gets the thumbnail image for a particular [bookID], [extension] if needed and [size]. + * + * Not all URLs produced by this function might point to a valid asset. + */ +internal fun bookThumbnailUrl(bookID: BookID, extension: String, size: UInt? = null): HttpUrl { + return homepageUrl.newBuilder() + .addPathSegment("images") + .addPathSegment("gallery") + .addPathSegment(bookID) + .addPathSegment( + when { + size == null && extension.isBlank() -> "thumb" + size == null -> "thumb.$extension" + extension.isBlank() -> "$size-thumb" + else -> "$size-thumb.$extension" + }, + ) + .build() +} + +/** + * Finds the closest common parent between 2 or more [elements]. + * + * If all [elements] are the same element, it will return the element itself. + * + * Returns null if the [elements] are not in the same [Document]. + */ +internal fun commonParent(vararg elements: Element): Element? { + require(elements.size > 1) { "elements must have more than 1 element" } + + val parents: List> = elements.map { it.parents().reversed().iterator() } + var lastCommon: Element? = null + + while (true) { + val layer: MutableSet = parents.mapTo(HashSet()) { + if (it.hasNext()) it.next() else null + } + if (null in layer) break + if (layer.size != 1) break + lastCommon = layer.single() + } + + return lastCommon +} + +/** + * Simple Utility class that represents a switching point between 2 patterns given by a certain predicate (see [switchingPoints]). + * + * For example in the sequence 111001 there are 2 switching points, + * the first one is 10, at indexes 2 and 3, + * and the second one is 01 at indexes 4 and 5. + * + * Both indexes and states are given for absolute clarity. + */ +internal data class SwitchingPoint(val left: Int, val right: Int, val leftState: Boolean, val rightState: Boolean) { + init { + if (left + 1 != right) { + reportErrorToUser { + "invalid SwitchingPoint: ($left, $right)" + } + } + if (leftState == rightState) { + reportErrorToUser { + "invalid SwitchingPoint: ($leftState, $rightState)" + } + } + } +} + +/** + * Function that will return all [SwitchingPoint]s in a certain sequence. + */ +internal fun Iterable.switchingPoints(predicate: (E) -> Boolean): List { + val iterator = iterator() + if (!iterator.hasNext()) return emptyList() + + val points: MutableList = ArrayList() + var state: Boolean = predicate(iterator.next()) + var index = 1 + for (element in iterator) { + val p = predicate(element) + if (state != p) { + points.add(SwitchingPoint(left = index - 1, right = index, leftState = state, rightState = p)) + state = p + } + index++ + } + + return points +} + +/** + * Utility class that can extract and format data from a certain [extractionElement]. + * + * Note that a [Document] is also an [Element]. + * + * The given [extractionElement] must have an [ownerDocument][Element.ownerDocument] with a valid absolute + * [location][Document.location] (according to [toHttpUrl]). + * + * [Lazy] properties are used to allow for the extraction process to happen only once + * (and for thread safety, see [LazyThreadSafetyMode], [lazy]). + * + * @author Federico d'Alonzo <me@npgx.dev> + */ +@Suppress("MemberVisibilityCanBePrivate") +class DataExtractor(val extractionElement: Element) { + + private val url: HttpUrl = extractionElement.ownerDocument()?.location()?.toHttpUrlOrNull() ?: reportErrorToUser { + buildString { + append("DataExtractor class requires a \"from\" element ") + append("that possesses an owner document with a valid absolute location(), but ") + append(extractionElement.ownerDocument()?.location()) + append(" was found!") + } + } + + /** + * All [anchor](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/a) tags + * that have a valid url in the [href](https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/href) + * [attribute](https://developer.mozilla.org/en-US/docs/Glossary/Attribute). + * + * To understand the [Element.select] methods, see [CSS selectors](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_selectors) + * and how to use them [to select DOM elements](https://developer.mozilla.org/en-US/docs/Web/API/Document_object_model/Locating_DOM_elements_using_selectors). + * + * JSoup's [Element.attr] methods supports the special `abs:` syntax when working with relative URLs. + * It is simply a shortcut to [Element.absUrl], which uses [Document.baseUri]. + */ + val allHrefAnchors: Map by lazy { + buildMap { + extractionElement.select("a[href]").forEach { a -> + val href = a.attr("abs:href") + if (href.isNotBlank()) { + href.toHttpUrlOrNull() + ?.let { this[a] = it } + } + } + } + } + + /** + * Filters [allHrefAnchors] for urls that satisfy `url.host.endsWith(homepageUrl.host)`. + * + * Meaning this property contains only elements that redirect to a Project Suki URL. + */ + val psHrefAnchors: Map by lazy { + allHrefAnchors.filterValues { url -> + url.host.endsWith(homepageUrl.host) + } + } + + /** Utility class that represents a "book" element, identifier by the [bookID]. */ + data class PSBook(val thumbnail: HttpUrl, val rawTitle: String, val bookUrl: HttpUrl, val bookID: BookID) { + override fun equals(other: Any?) = other is PSBook && this.bookID == other.bookID + override fun hashCode() = bookID.hashCode() + } + + /** + * This property contains all the [books][PSBook] contained in the [extractionElement]. + * + * Extraction is done by first obtaining all [psHrefAnchors], and using some heuristics + * to find the [PSBook.rawTitle] and [PSBook.thumbnail]'s extension. + * + * Other extensions might use CSS Selectors (see [DataExtractor]) to find these values in a fixed structure. + * But because [Project Suki](https://projectsuki.com) seems to be done by hand using [Bootstrap](https://getbootstrap.com/), + * it has a much more volatile structure. + * + * To make it possible to maintain this extension, data extraction is done by finding all elements in the page that redirect to + * book entries, and using generalized heuristics that should be robust to some types of changes. + * This has the disadvantage of making distinguishing between the different elements in a single page a nightmare, + * but luckly we don't need to do that for the purposes of a Tachiyomi extension. + */ + val books: Set by lazy { + buildSet { + data class BookUrlContainerElement(val container: Element, val href: HttpUrl, val matchResult: PathMatchResult) + + psHrefAnchors.entries + .map { (element, href) -> BookUrlContainerElement(element, href, href.matchAgainst(bookUrlPattern)) } + .filter { it.matchResult.doesMatch } + .groupBy { it.matchResult["bookid"]!!.value } + .forEach { (bookID: BookID, containers: List) -> + + val extension: String = containers.asSequence() + .flatMap { it.container.select("img") } + .mapNotNull { it.imageSrc() } + .map { it.matchAgainst(thumbnailUrlPattern) } + .filter { it.doesMatch } + .firstOrNull() + ?.get("thumbextension") + ?.value ?: "" + + val title: String = containers.asSequence() + .map { it.container } + .filter { it.select("img").isEmpty() } + .filter { it.parents().none { p -> p.tag().normalName() == "small" } } + .map { it.ownText() } + .filter { !it.equals("show more", ignoreCase = true) } + .firstOrNull() ?: reportErrorToUser { "Could not determine title for $bookID" } + + add( + PSBook( + thumbnail = bookThumbnailUrl(bookID, extension), + rawTitle = title, + bookUrl = homepageUrl.newBuilder() + .addPathSegment("book") + .addPathSegment(bookID) + .build(), + bookID = bookID, + ), + ) + } + } + } + + /** Utility class that extends [PSBook], by providing a [detailsTable], [alertData] and [description]. */ + data class PSBookDetails( + val book: PSBook, + val detailsTable: EnumMap, + val alertData: List, + val description: String, + ) { + override fun equals(other: Any?) = other is PSBookDetails && this.book == other.book + override fun hashCode() = book.hashCode() + } + + /** + * Represents a plethora of possibly-present data about some book. + * + * The process for extracting the details is described in the KDoc for [bookDetails]. + */ + @Suppress("RegExpUnnecessaryNonCapturingGroup") + enum class BookDetail(val display: String, val regex: Regex, val elementProcessor: (Element) -> String = { it.text() }) { + ALT_TITLE("Alt titles:", """(?:alternative|alt\.?) titles?:?""".toRegex(RegexOption.IGNORE_CASE)), + AUTHOR("Authors:", """authors?:?""".toRegex(RegexOption.IGNORE_CASE)), + ARTIST("Artists:", """artists?:?""".toRegex(RegexOption.IGNORE_CASE)), + STATUS("Status:", """status:?""".toRegex(RegexOption.IGNORE_CASE)), + ORIGIN("Origin:", """origin:?""".toRegex(RegexOption.IGNORE_CASE)), + RELEASE_YEAR("Release year:", """release(?: year):?""".toRegex(RegexOption.IGNORE_CASE)), + USER_RATING( + "User rating:", + """user ratings?:?""".toRegex(RegexOption.IGNORE_CASE), + elementProcessor = { ratings -> + val rates = when { + ratings.id() != "ratings" -> 0 + else -> ratings.children().count { it.hasClass("text-warning") } + } + + when (rates) { + in 1..5 -> "$rates/5" + else -> "?/5" + } + }, + ), + VIEWS("Views:", """views?:?""".toRegex(RegexOption.IGNORE_CASE)), + OFFICIAL("Official:", """official:?""".toRegex(RegexOption.IGNORE_CASE)), + PURCHASE("Purchase:", """purchase:?""".toRegex(RegexOption.IGNORE_CASE)), + GENRE("Genres:", """genre(?:\(s\))?:?""".toRegex(RegexOption.IGNORE_CASE)), + ; + + companion object { + private val values = values().toList() + fun from(type: String): BookDetail? = values.firstOrNull { it.regex.matches(type) } + } + } + + /** Used to detect visible/invisible alerts. */ + private val displayNoneRegex = """display: ?none;?""".toRegex(RegexOption.IGNORE_CASE) + + /** + * All [details][PSBookDetails] are extracted from a table-like list of `
` elements, + * found in the book main page, using generalized heuristics: + * + * First the algorithm looks for known entries in the "table" by looking for + * the [Status][BookDetail.STATUS] and [Origin][BookDetail.ORIGIN] fields. + * This is possible because these elements redirect to the [search](https://projectsuki.com/search) + * page with "status" and "origin" queries. + * + * The [commonParent] between the two elements is found and the table is subsequently analyzed. + * If this method fails, at least the [Author][BookDetail.AUTHOR], [Artist][BookDetail.ARTIST] and [Genre][BookDetail.GENRE] + * details are found via URLs. + * + * An extra [Genre][BookDetail.GENRE] is added when possible: + * - Origin: "kr" -> Genre: "Manhwa" + * - Origin: "cn" -> Genre: "Manhua" + * - Origin: "jp" -> Genre: "Manga" + * + * The book title, description and alerts are also found in similar ways. + * + * The description is expanded with all this information too. + */ + val bookDetails: PSBookDetails by lazy { + val match = url.matchAgainst(bookUrlPattern) + if (!match.doesMatch) reportErrorToUser { "cannot extract book details: $url" } + val bookID = match["bookid"]!!.value + + val authors: Map = psHrefAnchors.filter { (_, url) -> + url.queryParameterNames.contains("author") + } + + val artists: Map = psHrefAnchors.filter { (_, url) -> + url.queryParameterNames.contains("artist") + } + + val status: Map.Entry = psHrefAnchors.entries.single { (_, url) -> + url.queryParameterNames.contains("status") + } + + val origin: Map.Entry = psHrefAnchors.entries.single { (_, url) -> + url.queryParameterNames.contains("origin") + } + + val genres: Map = psHrefAnchors.filter { (_, url) -> + url.matchAgainst(genreSearchUrlPattern).doesMatch + } + + val details = EnumMap(BookDetail::class.java) + val tableParent: Element? = commonParent(status.key, origin.key) + val rows: List? = tableParent?.children()?.toList() + + for (row in (rows ?: emptyList())) { + val cols = row.children() + val typeElement = cols.getOrNull(0) ?: continue + val valueElement = cols.getOrNull(1) ?: continue + + val typeText = typeElement.text() + val detail = BookDetail.from(typeText) ?: continue + + details[detail] = detail.elementProcessor(valueElement) + } + + details.getOrPut(BookDetail.AUTHOR) { authors.keys.joinToString(", ") { it.text() } } + details.getOrPut(BookDetail.ARTIST) { artists.keys.joinToString(", ") { it.text() } } + details.getOrPut(BookDetail.STATUS) { status.key.text() } + details.getOrPut(BookDetail.ORIGIN) { origin.key.text() } + + details.getOrPut(BookDetail.GENRE) { genres.keys.joinToString(", ") { it.text() } } + + when (origin.value.queryParameter("origin")) { + "kr" -> "Manhwa" + "cn" -> "Manhua" + "jp" -> "Manga" + else -> null + }?.let { originGenre -> + details[BookDetail.GENRE] = """${details[BookDetail.GENRE]}, $originGenre""" + } + + val title: Element? = extractionElement.selectFirst("h2[itemprop=title]") ?: extractionElement.selectFirst("h2") ?: run { + // the common table is inside of a "row" wrapper that is the neighbour of the h2 containing the title + // if we sort of generalize this, the title should be the first + // text-node-bearing child of the table's grandparent + tableParent?.parent()?.parent()?.children()?.firstOrNull { it.textNodes().isNotEmpty() } + } + + val alerts: List = extractionElement.select(".alert, .alert-info") + .asSequence() + .filter { !it.attr("style").contains(displayNoneRegex) } + .filter { alert -> alert.parents().none { it.attr("style").contains(displayNoneRegex) } } + .map { alert -> + buildString { + var appendedSomething = false + alert.select("h4").singleOrNull()?.let { + appendLine(it.wholeText()) + appendedSomething = true + } + alert.select("p").singleOrNull()?.let { + appendLine(it.wholeText()) + appendedSomething = true + } + if (!appendedSomething) { + appendLine(alert.wholeText()) + } + } + } + .toList() + + val description = extractionElement.selectFirst("#descriptionCollapse") + ?.wholeText() ?: extractionElement.select(".description") + .joinToString("\n\n", postfix = "\n") { it.wholeText() } + + val extension = extractionElement.select("img") + .asSequence() + .mapNotNull { e -> e.imageSrc()?.let { e to it } } + .map { (img, src) -> img to src.matchAgainst(thumbnailUrlPattern) } + .filter { (_, match) -> match.doesMatch } + .firstOrNull() + ?.second + ?.get("thumbextension") + ?.value ?: "" + + PSBookDetails( + book = PSBook( + bookThumbnailUrl(bookID, extension), + title?.text() ?: reportErrorToUser { "could not determine book title from details for $bookID" }, + url, + bookID, + ), + detailsTable = details, + alertData = alerts, + description = description, + ) + } + + /** Represents some data type that a certain column in the chapters table represents. */ + sealed class ChaptersTableColumnDataType(val required: Boolean) { + + /** @return true if this data type is represented by a column's raw title. */ + abstract fun isRepresentedBy(from: String): Boolean + + /** Represents the chapter's title, which also normally includes the chapter number. */ + /*data*/ object Chapter : ChaptersTableColumnDataType(required = true) { + private val chapterHeaderRegex = """chapters?""".toRegex(RegexOption.IGNORE_CASE) + override fun isRepresentedBy(from: String): Boolean = from.matches(chapterHeaderRegex) + } + + /** Represents the chapter's scan group. */ + /*data*/ object Group : ChaptersTableColumnDataType(required = true) { + private val groupHeaderRegex = """groups?""".toRegex(RegexOption.IGNORE_CASE) + override fun isRepresentedBy(from: String): Boolean = from.matches(groupHeaderRegex) + } + + /** Represents the chapter's release date (when it was added to the site). */ + /*data*/ object Added : ChaptersTableColumnDataType(required = true) { + private val dateHeaderRegex = """added|date""".toRegex(RegexOption.IGNORE_CASE) + override fun isRepresentedBy(from: String): Boolean = from.matches(dateHeaderRegex) + } + + /** Represents the chapter's language. */ + /*data*/ object Language : ChaptersTableColumnDataType(required = false) { + private val languageHeaderRegex = """language""".toRegex(RegexOption.IGNORE_CASE) + override fun isRepresentedBy(from: String): Boolean = from.matches(languageHeaderRegex) + } + + /** Represents the chapter's view count. */ + /*data*/ object Views : ChaptersTableColumnDataType(required = false) { + @Suppress("RegExpUnnecessaryNonCapturingGroup") + private val languageHeaderRegex = """views?(?:\s*count)?""".toRegex(RegexOption.IGNORE_CASE) + override fun isRepresentedBy(from: String): Boolean = from.matches(languageHeaderRegex) + } + + companion object { + val all: Set by lazy { setOf(Chapter, Group, Added, Language, Views) } + val required: Set by lazy { all.filterTo(LinkedHashSet()) { it.required } } + + /** + * Takes the list of [headers] and returns a map that + * represents which data type is contained in which column index. + * + * Not all column indexes might be present if some column isn't recognised as a data type listed above. + */ + fun extractDataTypes(headers: List): Map { + return buildMap { + headers.map { it.text() } + .forEachIndexed { columnIndex, columnHeaderText -> + all.forEach { dataType -> + if (dataType.isRepresentedBy(columnHeaderText)) { + put(dataType, columnIndex) + } + } + } + } + } + } + } + + /** Represents a book's chapter. */ + data class BookChapter( + val chapterUrl: HttpUrl, + val chapterMatchResult: PathMatchResult, + val chapterTitle: String, + val chapterNumber: ChapterNumber?, + val chapterGroup: ScanGroup, + val chapterDateAdded: Date?, + val chapterLanguage: String, + ) { + + @Suppress("unused") + val bookID: BookID = chapterMatchResult["bookid"]!!.value + + @Suppress("unused") + val chapterID: ChapterID = chapterMatchResult["chapterid"]!!.value + } + + /** + * This property contains all the [BookChapter]s contained in the [extractionElement], grouped by the [ScanGroup]. + * + * The extraction proceeds by first finding all `` elements and then progressively refines + * the extracted data to remove false positives, combining all the extracted data and removing duplicates at the end. + * + * The `` element is analyzed to find the corresponding data types, this is resistant to shuffles + * (e.g. if the Chapter and Language columns are swapped, this will work anyways). + * + * Then the `` rows (``) are one by one processed to find the ones that match the column (`
`) + * size and data type positions that we care about. + */ + val bookChapters: Map> by lazy { + data class RawTable(val self: Element, val thead: Element, val tbody: Element) + data class AnalyzedTable(val raw: RawTable, val columnDataTypes: Map, val dataRows: List) + + val allChaptersByGroup: MutableMap> = extractionElement.select("table") + .asSequence() + .mapNotNull { tableElement -> + tableElement.selectFirst("thead")?.let { thead -> + tableElement.selectFirst("tbody")?.let { tbody -> + RawTable(tableElement, thead, tbody) + } + } + } + .mapNotNull { rawTable -> + val (_: Element, theadElement: Element, tbodyElement: Element) = rawTable + + val columnDataTypes: Map = theadElement.select("tr").asSequence() + .mapNotNull { headerRow -> + ChaptersTableColumnDataType.extractDataTypes(headers = headerRow.select("td")) + .takeIf { it.keys.containsAll(ChaptersTableColumnDataType.required) } + } + .firstOrNull() ?: return@mapNotNull null + + val dataRows: List = tbodyElement.select("tr") + .asSequence() + .map { it.children() } + .filter { it.size == columnDataTypes.size } + .toList() + + AnalyzedTable(rawTable, columnDataTypes, dataRows) + } + .map { analyzedTable -> + val (_: RawTable, columnDataTypes: Map, dataRows: List) = analyzedTable + + val rawData: List> = dataRows.map { row -> + columnDataTypes.mapValues { (_, columnIndex) -> + row[columnIndex] + } + } + + val rawByGroup: Map>> = rawData.groupBy { data -> + data[ChaptersTableColumnDataType.Group]!!.text() + } + + val chaptersByGroup: Map> = rawByGroup.mapValues { (groupName, chapters: List>) -> + chapters.map { data: Map -> + val chapterElement: Element = data[ChaptersTableColumnDataType.Chapter]!! + val addedElement: Element = data[ChaptersTableColumnDataType.Added]!! + val languageElement: Element? = data[ChaptersTableColumnDataType.Language] + // val viewsElement = data[ChaptersTableColumnDataType.Views] + + val chapterUrl: HttpUrl = (chapterElement.selectFirst("a[href]") ?: reportErrorToUser { "Could not determine chapter url for ${chapterElement.text()}" }) + .attr("abs:href") + .toHttpUrl() + val chapterUrlMatch: PathMatchResult = chapterUrl.matchAgainst(chapterUrlPattern) + + val chapterNumber: ChapterNumber? = chapterElement.text().tryAnalyzeChapterNumber() + val dateAdded: Date? = addedElement.text().tryAnalyzeChapterDate() + val chapterLanguage: String = languageElement?.text()?.trim()?.lowercase(Locale.US) ?: UNKNOWN_LANGUAGE + + BookChapter( + chapterUrl = chapterUrl, + chapterMatchResult = chapterUrlMatch, + chapterTitle = chapterElement.text(), + chapterNumber = chapterNumber, + chapterGroup = groupName, + chapterDateAdded = dateAdded, + chapterLanguage = chapterLanguage, + ) + } + } + + chaptersByGroup + } + .map { chaptersByGroup -> + chaptersByGroup.mapValues { (_, chapters) -> + chapters.tryInferMissingChapterNumbers() + } + } + .fold(LinkedHashMap()) { map, next -> + map.apply { + next.forEach { (group, chapters) -> + getOrPut(group) { ArrayList() }.addAll(chapters) + } + } + } + + allChaptersByGroup + } + + /** + * Utility class that represents a chapter number. + * + * Ordering is implemented in the way a human would most likely expect chapters to be ordered, + * e.g. chapter 10.15 comes after chapter 10.9 + */ + data class ChapterNumber(val main: UInt, val sub: UInt) : Comparable { + override fun compareTo(other: ChapterNumber): Int = comparator.compare(this, other) + + companion object { + val comparator: Comparator by lazy { compareBy({ it.main }, { it.sub }) } + val chapterNumberRegex: Regex = """(?:chapter|ch\.?)\s*(\d+)(?:\s*[.,-]\s*(\d+)?)?""".toRegex(RegexOption.IGNORE_CASE) + } + } + + /** Tries to infer the chapter number from the raw title. */ + private fun String.tryAnalyzeChapterNumber(): ChapterNumber? { + return ChapterNumber.chapterNumberRegex + .find(this) + ?.let { simpleMatchResult -> + val main: UInt = simpleMatchResult.groupValues[1].toUInt() + val sub: UInt = simpleMatchResult.groupValues[2].takeIf { it.isNotBlank() }?.toUInt() ?: 0u + + ChapterNumber(main, sub) + } + } + + /** + * Represents an index where the chapter number is unknown and + * whether or not the previous (above, next numerical chapter) + * or next (below, previous numerical chapter) chapter numbers + * are known. + * + * Requires [aboveIsKnown] or [belowIsKnown] to be true (or both). + */ + data class MissingChapterNumberEdge(val index: Int, val aboveIsKnown: Boolean, val belowIsKnown: Boolean) { + init { + require(aboveIsKnown || belowIsKnown) { "previous or next index must be known (or both)" } + } + } + + /** + * Chapter titles usually contain "Chapter xx" or "Ch. xx", but to provide some way to patch + * eventual holes (which happened before with "Ch." which wasn't accounted for), this method is provided. + * + * The algorithm tries to infer the chapter numbers by using correctly + * inferred zones and expanding them. + * + * The theoretical behaviour of this algorithm can easily be represented by + * using + for known and - for unknown chapter numbers + * (think of a 1D cellular automaton with very simple rules). + * An example (coarse) timeline could look like this: + * ``` + * -++--++---+-+++-- + * ++++++++-+++++++- + * +++++++++++++++++ + * ``` + * The actual changes always happen in a loop-like behaviour from left to right. + * We can use this to our advantage. + * + * Inference is done on a best-guess basis based on neighbouring values. + * Reporting to the user is preferred to avoid providing weird values. + */ + private fun List.tryInferMissingChapterNumbers(): List { + if (isEmpty()) return emptyList() + + val switchingPoints: List = switchingPoints { it.chapterNumber != null } + val missingChapterNumberEdges: ArrayDeque = ArrayDeque() + + when { + switchingPoints.isEmpty() && first().chapterNumber == null -> { + // oh dear, nothing is known + reportErrorToUser { "No chapter numbers could be inferred!" } + } + + switchingPoints.isEmpty() /* && first().chapterNumber != null */ -> { + // all are known + return this + } + } + + // convert switching points into an easier-to-handle format + switchingPoints.forEach { (left, right, leftIsKnown, rightIsKnown) -> + when { + leftIsKnown && !rightIsKnown -> { + // going from known to unknown in top to bottom direction + // chapters go in inverse order, so top is last, bottom is first + // left is top, right is bottom. + // subject of discussion is the right one (the unknown). + // this is the simpler case because we're going from known numbers + // to unknown. + missingChapterNumberEdges.add(MissingChapterNumberEdge(right, aboveIsKnown = true, belowIsKnown = false)) + } + + else -> { + // SwitchingPoint contract's guarantees: leftIsKnown = false, rightIsKnown = true + + // we were on "unknown" territory, and going to known + // subject of discussion is the left one (the unknown). + // there is a special case in which the unknown chapter is only one + // with known numbers in both directions. + // we need to account for that by checking if the last added member + // of missingChapterNumberEdges (if any) has index equal to "left" element + // (the subject, unknown) + // in which case we replace it, with a bi-directional MissingChapterNumberEdge + val last: MissingChapterNumberEdge? = missingChapterNumberEdges.lastOrNull() + when (last?.index == left) { + true -> { + // surrounded, replace + missingChapterNumberEdges[missingChapterNumberEdges.lastIndex] = MissingChapterNumberEdge(left, aboveIsKnown = true, belowIsKnown = true) + } + + else -> { + // 2 or more unknown sequence + missingChapterNumberEdges.add(MissingChapterNumberEdge(left, aboveIsKnown = false, belowIsKnown = true)) + } + } + } + } + } + + // previous chapter number + fun ChapterNumber.predictBelow(): ChapterNumber = when (sub) { + 0u -> ChapterNumber(main - 1u, 0u) // before chapter 18, chapter 17 + 5u -> ChapterNumber(main, 0u) // before chapter 18.5, chapter 18 + else -> ChapterNumber(main, sub - 1u) // before chapter 18.4, chapter 18.3 + } + + // next chapter number + fun ChapterNumber.predictAbove(): ChapterNumber = when (sub) { + 0u, 5u -> ChapterNumber(main + 1u, 0u) // after chapter 17 or 17.5, chapter 18 + else -> ChapterNumber(main, sub + 1u) // after chapter 18.3, 18.4 + } + + fun MissingChapterNumberEdge.indexAbove(): Int = index - 1 + fun MissingChapterNumberEdge.indexBelow(): Int = index + 1 + + val result: MutableList = ArrayList(this) + while (missingChapterNumberEdges.isNotEmpty()) { + val edge: MissingChapterNumberEdge = missingChapterNumberEdges.removeFirst() + + when { + edge.aboveIsKnown && edge.belowIsKnown -> { + // both are known + val above: BookChapter = result[edge.indexAbove()] + val below: BookChapter = result[edge.indexBelow()] + + val inferredByDecreasing = above.chapterNumber!!.predictBelow() + val inferredByIncreasing = below.chapterNumber!!.predictAbove() + + when { + above.chapterNumber == below.chapterNumber -> { + reportErrorToUser { "Chapter number inference failed (case 0)!" } + } + + above.chapterNumber < below.chapterNumber -> { + reportErrorToUser { "Chapter number inference failed (case 1)!" } + } + + inferredByDecreasing == inferredByIncreasing -> { + // inference agrees from both sides + result[edge.index] = result[edge.index].copy(chapterNumber = inferredByDecreasing) + } + + // might be handled by above, just for safety + inferredByIncreasing >= above.chapterNumber || inferredByDecreasing <= below.chapterNumber -> { + reportErrorToUser { "Chapter number inference failed (case 2)!" } + } + + inferredByDecreasing > inferredByIncreasing -> { + // gap between chapters, take the lowest + result[edge.index] = result[edge.index].copy(chapterNumber = inferredByIncreasing) + } + + else -> { + // inferredByIncreasing > inferredByDecreasing should be handled by branch 2 above + // everything else should be reported to user + reportErrorToUser { "Chapter number inference failed (case 3)!" } + } + } + } + + edge.aboveIsKnown -> { + // only above is known + val above: BookChapter = result[edge.indexAbove()] + val inferredByDecreasing = above.chapterNumber!!.predictBelow() + + // handle this one + result[edge.index] = result[edge.index].copy(chapterNumber = inferredByDecreasing) + + // there are 2 main cases, where + is known, - is unknown, * just changed above and . is anything + // case 1: ..+*-+.. + // case 2: ..+*--.. + when (missingChapterNumberEdges.firstOrNull()?.index == edge.index + 1) { + true -> { + // replace next edge with surrounded + val removed = missingChapterNumberEdges.removeFirst() + missingChapterNumberEdges.addFirst(removed.copy(aboveIsKnown = true, belowIsKnown = false)) + } + + false -> { + // add new edge below current edge's index + missingChapterNumberEdges.addLast(MissingChapterNumberEdge(edge.indexBelow(), aboveIsKnown = true, belowIsKnown = false)) + } + } + } + + edge.belowIsKnown -> { + // only below is known + val below: BookChapter = result[edge.index + 1] + val inferredByIncreasing = below.chapterNumber!!.predictAbove() + + // handle this one + result[edge.index] = result[edge.index].copy(chapterNumber = inferredByIncreasing) + + // there are 2 main cases (like see above): + // case 1: ..+-*+.. + // case 2: ..--*+.. + when (missingChapterNumberEdges.lastOrNull()?.index == edge.index - 1) { + true -> { + // replace last edge with surrounded + val removed = missingChapterNumberEdges.removeLast() + missingChapterNumberEdges.addLast(removed.copy(aboveIsKnown = true, belowIsKnown = true)) + } + + false -> { + // add new edge above current edge's index + missingChapterNumberEdges.addLast(MissingChapterNumberEdge(edge.indexAbove(), aboveIsKnown = false, belowIsKnown = true)) + } + } + } + + else -> { + // shouldn't be possible + reportErrorToUser { "Chapter number inference failed (case 4)!" } + } + } + } + + return result + } + + /** + * ThreadLocal [SimpleDateFormat] (SimpleDateFormat is not thread safe). + */ + private val absoluteDateFormat: ThreadLocal = object : ThreadLocal() { + override fun initialValue() = runCatching { SimpleDateFormat("MMMM dd, yyyy", Locale.US) }.fold( + onSuccess = { it }, + onFailure = { reportErrorToUser { "Invalid SimpleDateFormat(MMMM dd, yyyy)" } }, + ) + } + + private val relativeChapterDateRegex = """(\d+)\s+(years?|months?|weeks?|days?|hours?|mins?|minutes?|seconds?|sec)\s+ago""".toRegex(RegexOption.IGNORE_CASE) + + /** + * Tries to parse a possibly human-readable relative [Date]. + * + * @see Calendar + */ + private fun String.tryAnalyzeChapterDate(): Date? { + return when (val match = relativeChapterDateRegex.matchEntire(trim())) { + null -> { + absoluteDateFormat.get() + .runCatching { this!!.parse(this@tryAnalyzeChapterDate) } + .fold( + onSuccess = { it }, + onFailure = { reportErrorToUser { "Could not parse date: $this" } }, + ) + } + + else -> { + // relative + val number: Int = match.groupValues[1].toInt() + val relativity: String = match.groupValues[2] + val cal: Calendar = Calendar.getInstance(TimeZone.getDefault(), Locale.US) + + with(relativity) { + when { + startsWith("year") -> cal.add(Calendar.YEAR, -number) + startsWith("month") -> cal.add(Calendar.MONTH, -number) + startsWith("week") -> cal.add(Calendar.DAY_OF_MONTH, -number * 7) + startsWith("day") -> cal.add(Calendar.DAY_OF_MONTH, -number) + startsWith("hour") -> cal.add(Calendar.HOUR, -number) + startsWith("min") -> cal.add(Calendar.MINUTE, -number) + startsWith("sec") -> cal.add(Calendar.SECOND, -number) + } + } + + cal.time + } + } + } +} diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/NormalizedURL.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/NormalizedURL.kt deleted file mode 100644 index 3f6840dde..000000000 --- a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/NormalizedURL.kt +++ /dev/null @@ -1,54 +0,0 @@ -package eu.kanade.tachiyomi.extension.all.projectsuki - -import okhttp3.HttpUrl -import okhttp3.HttpUrl.Companion.toHttpUrl -import okhttp3.HttpUrl.Companion.toHttpUrlOrNull -import org.jsoup.nodes.Element - -typealias NormalizedURL = HttpUrl - -val NormalizedURL.rawAbsolute: String - get() = toString() - -private val psDomainURI = """https://projectsuki.com/""".toHttpUrl().toUri() - -val NormalizedURL.rawRelative: String? - get() { - val uri = toUri() - return psDomainURI - .relativize(uri) - .takeIf { it != uri } - ?.let { """/$it""" } - } - -private val protocolMatcher = """^https?://""".toRegex() -private val domainMatcher = """^https?://(?:[a-zA-Z\d\-]+\.)+[a-zA-Z\d\-]+""".toRegex() -fun String.toNormalURL(): NormalizedURL? { - if (contains(':') && !contains(protocolMatcher)) { - return null - } - - val toParse = StringBuilder() - - if (!contains(domainMatcher)) { - toParse.append("https://projectsuki.com") - if (!this.startsWith("/")) toParse.append('/') - } - - toParse.append(this) - - return toParse.toString().toHttpUrlOrNull() -} - -fun NormalizedURL.pathStartsWith(other: Iterable): Boolean = pathSegments.zip(other).all { (l, r) -> l == r } - -fun NormalizedURL.isPSUrl() = host.endsWith("${PS.identifier}.com") - -fun NormalizedURL.isBookURL() = isPSUrl() && pathSegments.first() == "book" -fun NormalizedURL.isReadURL() = isPSUrl() && pathStartsWith(PS.chapterPath) -fun NormalizedURL.isImagesGalleryURL() = isPSUrl() && pathStartsWith(PS.pagePath) - -fun Element.attrNormalizedUrl(attrName: String): NormalizedURL? { - val attrValue = attr("abs:$attrName").takeIf { it.isNotBlank() } ?: return null - return attrValue.toNormalURL() -} diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PS.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PS.kt deleted file mode 100644 index 68c4dd935..000000000 --- a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PS.kt +++ /dev/null @@ -1,129 +0,0 @@ -@file:Suppress("MayBeConstant", "unused") - -package eu.kanade.tachiyomi.extension.all.projectsuki - -import org.jsoup.nodes.Element -import java.util.Calendar -import java.util.Locale -import kotlin.concurrent.getOrSet - -@Suppress("MemberVisibilityCanBePrivate") -internal object PS { - const val identifier: String = "projectsuki" - const val identifierShort: String = "ps" - - val bookPath = listOf("book") - val pagePath = listOf("images", "gallery") - val chapterPath = listOf("read") - - const val SEARCH_INTENT_PREFIX: String = "$identifierShort:" - - const val PREFERENCE_WHITELIST_LANGUAGES = "$identifier-languages-whitelist" - const val PREFERENCE_WHITELIST_LANGUAGES_TITLE = "Whitelist the following languages:" - const val PREFERENCE_WHITELIST_LANGUAGES_SUMMARY = - "Will keep project chapters in the following languages." + - " Takes precedence over blacklisted languages." + - " It will match the string present in the \"Language\" column of the chapter." + - " Whitespaces will be trimmed." + - " Leave empty to allow all languages." + - " Separate each entry with a comma ','" - - const val PREFERENCE_BLACKLIST_LANGUAGES = "$identifier-languages-blacklist" - const val PREFERENCE_BLACKLIST_LANGUAGES_TITLE = "Blacklist the following languages:" - const val PREFERENCE_BLACKLIST_LANGUAGES_SUMMARY = - "Will hide project chapters in the following languages." + - " Works identically to whitelisting." -} - -fun Element.containsBookLinks(): Boolean = select("a").any { - it.attrNormalizedUrl("href")?.isBookURL() == true -} - -fun Element.containsReadLinks(): Boolean = select("a").any { - it.attrNormalizedUrl("href")?.isReadURL() == true -} - -fun Element.containsImageGalleryLinks(): Boolean = select("a").any { - it.attrNormalizedUrl("href")?.isImagesGalleryURL() == true -} - -fun Element.getAllUrlElements(selector: String, attrName: String, predicate: (NormalizedURL) -> Boolean): Map { - return select(selector) - .mapNotNull { element -> element.attrNormalizedUrl(attrName)?.let { element to it } } - .filter { (_, url) -> predicate(url) } - .toMap() -} - -fun Element.getAllBooks(): Map { - val bookUrls = getAllUrlElements("a", "href") { it.isBookURL() } - val byID: Map> = bookUrls.groupBy { (_, url) -> url.pathSegments[1] /* /book/ */ } - - @Suppress("UNCHECKED_CAST") - return byID.mapValues { (bookid, elements) -> - val thumb: Element? = elements.entries.firstNotNullOfOrNull { (element, _) -> - element.select("img").firstOrNull() - } - val title = elements.entries.firstOrNull { (element, _) -> - element.select("img").isEmpty() && element.text().let { - it.isNotBlank() && it.lowercase(Locale.US) != "show more" - } - } - - if (thumb != null && title != null) { - PSBook(thumb, title.key, title.key.text(), bookid, title.value) - } else { - null - } - }.filterValues { it != null } as Map -} - -inline fun Map.groupBy(keySelector: (Map.Entry) -> SK): Map> = buildMap<_, MutableMap> { - this@groupBy.entries.forEach { entry -> - getOrPut(keySelector(entry)) { HashMap() }[entry.key] = entry.value - } -} - -private val absoluteDateFormat: ThreadLocal = ThreadLocal() -fun String.parseDate(ifFailed: Long = 0L): Long { - return when { - endsWith("ago") -> { - // relative - val number = takeWhile { it.isDigit() }.toInt() - val cal = Calendar.getInstance() - - when { - contains("day") -> cal.apply { add(Calendar.DAY_OF_MONTH, -number) } - contains("hour") -> cal.apply { add(Calendar.HOUR, -number) } - contains("minute") -> cal.apply { add(Calendar.MINUTE, -number) } - contains("second") -> cal.apply { add(Calendar.SECOND, -number) } - contains("week") -> cal.apply { add(Calendar.DAY_OF_MONTH, -number * 7) } - contains("month") -> cal.apply { add(Calendar.MONTH, -number) } - contains("year") -> cal.apply { add(Calendar.YEAR, -number) } - else -> null - }?.timeInMillis ?: ifFailed - } - - else -> { - // absolute? - absoluteDateFormat.getOrSet { java.text.SimpleDateFormat("MMMM dd, yyyy", Locale.US) }.parse(this)?.time ?: ifFailed - } - } -} - -private val imageExtensions = setOf(".jpg", ".png", ".jpeg", ".webp", ".gif", ".avif", ".tiff") -private val simpleSrcVariants = listOf("src", "data-src", "data-lazy-src") -fun Element.imgNormalizedURL(): NormalizedURL? { - simpleSrcVariants.forEach { variant -> - if (hasAttr(variant)) { - return attrNormalizedUrl(variant) - } - } - - if (hasAttr("srcset")) { - return attr("abs:srcset").substringBefore(" ").toNormalURL() - } - - return attributes().firstOrNull { - it.key.contains("src") && imageExtensions.any { ext -> it.value.contains(ext) } - }?.value?.substringBefore(" ")?.toNormalURL() -} diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSBook.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSBook.kt deleted file mode 100644 index 87198dee1..000000000 --- a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSBook.kt +++ /dev/null @@ -1,11 +0,0 @@ -package eu.kanade.tachiyomi.extension.all.projectsuki - -import org.jsoup.nodes.Element - -data class PSBook( - val imgElement: Element, - val titleElement: Element, - val title: String, - val mangaID: String, - val url: NormalizedURL, -) diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSFilters.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSFilters.kt deleted file mode 100644 index dfa2d1646..000000000 --- a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSFilters.kt +++ /dev/null @@ -1,90 +0,0 @@ -@file:Suppress("CanSealedSubClassBeObject") - -package eu.kanade.tachiyomi.extension.all.projectsuki - -import eu.kanade.tachiyomi.source.model.Filter -import okhttp3.HttpUrl - -@Suppress("NOTHING_TO_INLINE") -object PSFilters { - internal sealed interface AutoFilter { - fun applyTo(builder: HttpUrl.Builder) - } - - private inline fun HttpUrl.Builder.setAdv() = setQueryParameter("adv", "1") - - class Author : Filter.Text("Author"), AutoFilter { - - override fun applyTo(builder: HttpUrl.Builder) { - when { - state.isNotBlank() -> builder.setAdv().addQueryParameter("author", state) - } - } - - companion object { - val ownHeader by lazy { Header("Cannot search by multiple authors") } - } - } - - class Artist : Filter.Text("Artist"), AutoFilter { - - override fun applyTo(builder: HttpUrl.Builder) { - when { - state.isNotBlank() -> builder.setAdv().addQueryParameter("artist", state) - } - } - - companion object { - val ownHeader by lazy { Header("Cannot search by multiple artists") } - } - } - - class Status : Filter.Select("Status", Value.values()), AutoFilter { - enum class Value(val display: String, val query: String) { - ANY("Any", ""), - ONGOING("Ongoing", "ongoing"), - COMPLETED("Completed", "completed"), - HIATUS("Hiatus", "hiatus"), - CANCELLED("Cancelled", "cancelled"), - ; - - override fun toString(): String = display - - companion object { - private val values: Array = values() - operator fun get(ordinal: Int) = values[ordinal] - } - } - - override fun applyTo(builder: HttpUrl.Builder) { - when (val state = Value[state]) { - Value.ANY -> {} // default, do nothing - else -> builder.setAdv().addQueryParameter("status", state.query) - } - } - } - - class Origin : Filter.Select("Origin", Value.values()), AutoFilter { - enum class Value(val display: String, val query: String?) { - ANY("Any", null), - KOREA("Korea", "kr"), - CHINA("China", "cn"), - JAPAN("Japan", "jp"), - ; - - override fun toString(): String = display - - companion object { - private val values: Array = Value.values() - operator fun get(ordinal: Int) = values[ordinal] - } - } - - override fun applyTo(builder: HttpUrl.Builder) { - when (val state = Value[state]) { - Value.ANY -> {} // default, do nothing - else -> builder.setAdv().addQueryParameter("origin", state.query) - } - } - } -} diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PathPattern.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PathPattern.kt new file mode 100644 index 000000000..734b821c7 --- /dev/null +++ b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PathPattern.kt @@ -0,0 +1,84 @@ +package eu.kanade.tachiyomi.extension.all.projectsuki + +import okhttp3.HttpUrl + +/** + * @see EXTENSION_INFO Found in ProjectSuki.kt + */ +@Suppress("unused") +private inline val INFO: Nothing get() = error("INFO") + +/** + * Utility class made to help identify different urls. + * + * null regex means wildcard, matches anything. + * + * Meant to be used with [matchAgainst], will match against [HttpUrl.pathSegments] + * + * @author Federico d'Alonzo <me@npgx.dev> + */ +data class PathPattern(val paths: List) { + constructor(vararg paths: Regex?) : this(paths.asList()) + + init { + if (paths.isEmpty()) { + reportErrorToUser { + "Invalid PathPattern, cannot be empty!" + } + } + } +} + +/** + * Utility class to represent the [MatchResult]s obtained when matching a [PathPattern] + * against an [HttpUrl]. + * + * When [matchResults] is null, it means the [HttpUrl] either: + * - when `allowSubPaths` in [matchAgainst] is `false`: [HttpUrl.pathSegments]`.size` != [PathPattern.paths]`.size` + * - when `allowSubPaths` in [matchAgainst] is `true`: [HttpUrl.pathSegments]`.size` < [PathPattern.paths]`.size` + * + * @see matchAgainst + * + * @author Federico d'Alonzo <me@npgx.dev> + */ +data class PathMatchResult(val doesMatch: Boolean, val matchResults: List?) { + operator fun get(name: String): MatchGroup? = matchResults?.firstNotNullOfOrNull { + it?.groups + // this throws if the group by "name" isn't found AND can return null too + ?.runCatching { get(name) } + ?.getOrNull() + } + + init { + if (matchResults?.isEmpty() == true) { + reportErrorToUser { + "Invalid PathMatchResult, matchResults must either be null or not empty!" + } + } + } +} + +/** + * @see PathPattern + * @see PathMatchResult + */ +fun HttpUrl.matchAgainst(pattern: PathPattern, allowSubPaths: Boolean = false, ignoreEmptySegments: Boolean = true): PathMatchResult { + val actualSegments: List = if (ignoreEmptySegments) pathSegments.filter { it.isNotBlank() } else pathSegments + val sizeReq = when (allowSubPaths) { + false -> actualSegments.size == pattern.paths.size + true -> actualSegments.size >= pattern.paths.size + } + + if (!sizeReq) return PathMatchResult(false, null) + + val matchResults: MutableList = ArrayList() + var matches = true + + actualSegments.zip(pattern.paths) { segment, regex -> + val match: MatchResult? = regex?.matchEntire(segment) + matchResults.add(match) + matches = matches && (regex == null || match != null) + } + + return PathMatchResult(matches, matchResults) +} diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSuki.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSuki.kt index 0dbf62fc6..c824e5970 100644 --- a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSuki.kt +++ b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSuki.kt @@ -1,15 +1,10 @@ package eu.kanade.tachiyomi.extension.all.projectsuki -import android.app.Application -import android.content.SharedPreferences -import androidx.preference.EditTextPreference import androidx.preference.PreferenceScreen -import eu.kanade.tachiyomi.lib.randomua.addRandomUAPreferenceToScreen import eu.kanade.tachiyomi.lib.randomua.getPrefCustomUA import eu.kanade.tachiyomi.lib.randomua.getPrefUAType import eu.kanade.tachiyomi.lib.randomua.setRandomUserAgent import eu.kanade.tachiyomi.network.GET -import eu.kanade.tachiyomi.network.POST import eu.kanade.tachiyomi.network.asObservableSuccess import eu.kanade.tachiyomi.network.interceptor.rateLimit import eu.kanade.tachiyomi.source.ConfigurableSource @@ -22,241 +17,441 @@ import eu.kanade.tachiyomi.source.model.SManga import eu.kanade.tachiyomi.source.model.UpdateStrategy import eu.kanade.tachiyomi.source.online.HttpSource import eu.kanade.tachiyomi.util.asJsoup -import kotlinx.serialization.encodeToString import kotlinx.serialization.json.Json -import kotlinx.serialization.json.jsonObject -import kotlinx.serialization.json.jsonPrimitive import okhttp3.HttpUrl import okhttp3.HttpUrl.Companion.toHttpUrl -import okhttp3.MediaType.Companion.toMediaType +import okhttp3.HttpUrl.Companion.toHttpUrlOrNull import okhttp3.OkHttpClient import okhttp3.Request -import okhttp3.RequestBody.Companion.toRequestBody import okhttp3.Response -import org.jsoup.Jsoup -import org.jsoup.nodes.Element +import org.jsoup.nodes.Document import rx.Observable -import uy.kohesive.injekt.Injekt -import uy.kohesive.injekt.api.get +import java.net.URI import java.util.Locale +import java.util.concurrent.TimeUnit +import kotlin.math.floor +import kotlin.math.log10 +import kotlin.math.pow +/** + * [Project Suki](https://projectsuki.com) + * [Tachiyomi](https://github.com/tachiyomiorg/tachiyomi) + * [extension](https://github.com/tachiyomiorg/tachiyomi-extensions) + * + * Most of the code should be documented, `@author` KDoc tags are mostly to know + * who to bother *when necessary*. + * If you contributed to this extension, be sure to add yourself in an `@author` tag! + * + * If you want to understand how this extension works, + * I recommend first looking at [ProjectSuki], then [DataExtractor], + * then the rest of the project. + */ +internal inline val EXTENSION_INFO: Nothing get() = error("EXTENSION_INFO") + +internal const val SHORT_FORM_ID: String = """ps""" + +internal val homepageUrl: HttpUrl = "https://projectsuki.com".toHttpUrl() +internal val homepageUri: URI = homepageUrl.toUri() + +/** PATTERN: `https://projectsuki.com/book/` */ +internal val bookUrlPattern = PathPattern( + """book""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** PATTERN: `https://projectsuki.com/browse/` */ +@Suppress("unused") +internal val browsePattern = PathPattern( + """browse""".toRegex(RegexOption.IGNORE_CASE), + """(?\d+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** + * PATTERN: `https://projectsuki.com/read///` + * + * `` is actually a filter of sorts that will remove pages < ``'s value. + */ +internal val chapterUrlPattern = PathPattern( + """read""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** + * PATTERNS: + * - `https://projectsuki.com/images/gallery//thumb` + * - `https://projectsuki.com/images/gallery//thumb.` + * - `https://projectsuki.com/images/gallery//-thumb` + * - `https://projectsuki.com/images/gallery//-thumb.` + */ +internal val thumbnailUrlPattern = PathPattern( + """images""".toRegex(RegexOption.IGNORE_CASE), + """gallery""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), + """(?\d+-)?thumb(?:\.(?.+))?""".toRegex(RegexOption.IGNORE_CASE), +) + +/** PATTERN: `https://projectsuki.com/images/gallery///` */ +internal val pageUrlPattern = PathPattern( + """images""".toRegex(RegexOption.IGNORE_CASE), + """gallery""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** PATTERN: `https://projectsuki.com/genre/` */ +internal val genreSearchUrlPattern = PathPattern( + """genre""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** PATTERN: `https://projectsuki.com/group/` */ +@Suppress("unused") +internal val groupUrlPattern = PathPattern( + """group""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** + * Used on the website when there's an image loading error, could be used in extension. + */ +@Suppress("unused") +internal val emptyImageUrl: HttpUrl = homepageUrl.newBuilder() + .addPathSegment("images") + .addPathSegment("gallery") + .addPathSegment("empty.jpg") + .build() + +/** + * Removes the [URL's](https://en.wikipedia.org/wiki/URL) host and scheme/protocol, + * leaving only the path, query and fragment, *without leading `/`* + * + * @see URI.relativize + */ +internal val HttpUrl.rawRelative: String? + get() { + val uri = toUri() + val relative = homepageUri.relativize(uri) + return when { + uri === relative -> null + else -> relative.toASCIIString() + } + } + +internal val reportPrefix: String + get() = """Error! Report on GitHub (tachiyomiorg/tachiyomi-extensions)""" + +/** Just throw an [error], which will get caught by Tachiyomi: the message will be exposed as a [toast][android.widget.Toast]. */ +internal inline fun reportErrorToUser(message: () -> String): Nothing { + error("""$reportPrefix: ${message()}""") +} + +/** Used when chapters don't have a [Language][DataExtractor.ChaptersTableColumnDataType.Language] column (if that ever happens). */ +internal const val UNKNOWN_LANGUAGE: String = "unknown" + +/** + * Actual Tachiyomi extension, ties everything together. + * + * Most of the work happens in [DataExtractor], [ProjectSukiAPI], [ProjectSukiFilters] and [ProjectSukiPreferences]. + * + * @author Federico d'Alonzo <me@npgx.dev> + */ @Suppress("unused") class ProjectSuki : HttpSource(), ConfigurableSource { + override val name: String = "Project Suki" - override val baseUrl: String = "https://projectsuki.com" - override val lang: String = "en" + override val baseUrl: String = homepageUri.toASCIIString() + override val lang: String = "all" + override val id: Long = 8965918600406781666L - private val preferences: SharedPreferences by lazy { - Injekt.get().getSharedPreferences("source_$id", 0x0000) + /** Handles extension preferences found in Extensions > Project Suki > Gear icon */ + private val preferences = ProjectSukiPreferences(id) + + /** See [Kotlinx-Serialization](https://github.com/Kotlin/kotlinx.serialization). */ + private val json: Json = Json { + ignoreUnknownKeys = true + explicitNulls = true + encodeDefaults = true } - private fun String.processLangPref(): List = split(",").map { it.trim().lowercase(Locale.US) } - - private val SharedPreferences.whitelistedLanguages: List - get() = getString(PS.PREFERENCE_WHITELIST_LANGUAGES, "")!! - .processLangPref() - - private val SharedPreferences.blacklistedLanguages: List - get() = getString(PS.PREFERENCE_BLACKLIST_LANGUAGES, "")!! - .processLangPref() - override fun setupPreferenceScreen(screen: PreferenceScreen) { - addRandomUAPreferenceToScreen(screen) + with(preferences) { screen.configure() } + } - screen.addPreference( - EditTextPreference(screen.context).apply { - key = PS.PREFERENCE_WHITELIST_LANGUAGES - title = PS.PREFERENCE_WHITELIST_LANGUAGES_TITLE - summary = PS.PREFERENCE_WHITELIST_LANGUAGES_SUMMARY - }, + /** + * [OkHttp's](https://square.github.io/okhttp/) [OkHttpClient] that handles network requests and responses. + * + * Thanks to Tachiyomi's [NetworkHelper](https://github.com/tachiyomiorg/tachiyomi/blob/58daedc89ee18d04e7af5bab12629680dba4096c/core/src/main/java/eu/kanade/tachiyomi/network/NetworkHelper.kt#L21C12-L21C12) + * (this is a permalink, check for updated version), + * most client options are already set as they should be, including the [Cache][okhttp3.Cache]. + */ + override val client: OkHttpClient = network.client.newBuilder() + .setRandomUserAgent( + userAgentType = preferences.shared.getPrefUAType(), + customUA = preferences.shared.getPrefCustomUA(), ) + .rateLimit(2, 1, TimeUnit.SECONDS) + .build() - screen.addPreference( - EditTextPreference(screen.context).apply { - key = PS.PREFERENCE_BLACKLIST_LANGUAGES - title = PS.PREFERENCE_BLACKLIST_LANGUAGES_TITLE - summary = PS.PREFERENCE_BLACKLIST_LANGUAGES_SUMMARY - }, + /** + * Specify what request will be sent to the server. + * + * This specific method returns a [GET](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) + * request to be sent to [https://projectsuki.com/browse](https://projectsuki.com/browse). + * + * Using the default [HttpSource]'s [Headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers). + */ + override fun popularMangaRequest(page: Int) = GET( + homepageUrl.newBuilder() + .addPathSegment("browse") + .addPathSegment((page - 1).toString()) // starts at 0 + .build(), + headers, + ) + + /** Whether or not this extension supports the "Latest" tab. */ + override val supportsLatest: Boolean get() = true + + /** Same concept as [popularMangaRequest], but is sent to [https://projectsuki.com/](https://projectsuki.com/). */ + override fun latestUpdatesRequest(page: Int) = GET(homepageUrl, headers) + + /** + * Utility to find and apply a filter specified by [T], + * see [reified](https://kotlinlang.org/docs/inline-functions.html#reified-type-parameters) + * if you're not familiar with the concept. + */ + private inline fun HttpUrl.Builder.applyPSFilter( + from: FilterList, + ): HttpUrl.Builder where T : Filter<*>, T : ProjectSukiFilters.ProjectSukiFilter = apply { + from.firstNotNullOfOrNull { it as? T }?.run { applyFilter() } + } + + /** + * Same concept as [popularMangaRequest], but is sent to [https://projectsuki.com/search](https://projectsuki.com/search). + * This is the [Full-Site][ProjectSukiFilters.SearchMode.FULL_SITE] variant of search, it *will* return results that have no chapters. + */ + override fun searchMangaRequest(page: Int, query: String, filters: FilterList): Request { + return GET( + homepageUrl.newBuilder() + .addPathSegment("search") + .addQueryParameter("page", (page - 1).toString()) + .addQueryParameter("q", query) + .applyPSFilter(from = filters) + .applyPSFilter(from = filters) + .applyPSFilter(from = filters) + .applyPSFilter(from = filters) + .build(), + headers, ) } - override val client: OkHttpClient = network.cloudflareClient.newBuilder() - .setRandomUserAgent( - userAgentType = preferences.getPrefUAType(), - customUA = preferences.getPrefCustomUA(), - filterInclude = listOf("chrome"), - ) - .rateLimit(4) - .build() - - override fun popularMangaRequest(page: Int) = GET(baseUrl, headers) - - // differentiating between popular and latest manga in the main page is - // *theoretically possible* but a pain, as such, this is fine "for now" + /** + * Handles the server's [Response] that was returned from [popularMangaRequest]'s [Request]. + * + * Because we asked the server for a webpage, it will return, in the [Request's body][okhttp3.RequestBody], + * the [html](https://developer.mozilla.org/en-US/docs/Web/HTML) that makes up that page, + * including any [css](https://developer.mozilla.org/en-US/docs/Web/CSS) and + * [JavaScript](https://developer.mozilla.org/en-US/docs/Web/JavaScript) in `