diff --git a/src/all/projectsuki/AndroidManifest.xml b/src/all/projectsuki/AndroidManifest.xml index 867eb056f..6a117da70 100644 --- a/src/all/projectsuki/AndroidManifest.xml +++ b/src/all/projectsuki/AndroidManifest.xml @@ -2,33 +2,32 @@ + + + + - - + + + + - - + + - - - - + + + diff --git a/src/all/projectsuki/CHANGELOG.md b/src/all/projectsuki/CHANGELOG.md new file mode 100644 index 000000000..b36c92efd --- /dev/null +++ b/src/all/projectsuki/CHANGELOG.md @@ -0,0 +1,20 @@ +## Version 1.4.2 + +- Improved search feature +- New and improved Popular tab +- Old Popular tab moved to Latest +- Fixed chapter numbering issues when "Chapter" wasn't explicitly present (e.g. "Ch. 2") +- Added chapter number inference for when the above fails +- Improved user feedback for errors and issues +- Fixed wording and clarity on most descriptions +- Added simple search option for Android API < 24 +- Chapter language will now appear right of the scan group +- Enhanced chapters sorting (number > group > language) +- Changed extension language from English to Multi + +## Version 1.4.1 + +First version of the extension: + +- basic functionality +- basic search, limited to full-site \ No newline at end of file diff --git a/src/all/projectsuki/README.md b/src/all/projectsuki/README.md new file mode 100644 index 000000000..5d44d2754 --- /dev/null +++ b/src/all/projectsuki/README.md @@ -0,0 +1,9 @@ +# Project Suki + +Go check out our general FAQs and Guides over at +[Extension FAQ](https://tachiyomi.org/help/faq/#extensions) or +[Getting Started](https://tachiyomi.org/help/guides/getting-started/#installation). + +If you still don't find the answer you're looking for you're welcome to open an +[issue](https://github.com/tachiyomiorg/tachiyomi-extensions/issues) +and mention [me](https://github.com/npgx/) *in the issue*. diff --git a/src/all/projectsuki/build.gradle b/src/all/projectsuki/build.gradle index 5ea5dc348..def93d091 100644 --- a/src/all/projectsuki/build.gradle +++ b/src/all/projectsuki/build.gradle @@ -1,15 +1,16 @@ apply plugin: 'com.android.application' apply plugin: 'kotlin-android' +apply plugin: 'kotlinx-serialization' ext { extName = 'Project Suki' pkgNameSuffix = 'all.projectsuki' extClass = '.ProjectSuki' - extVersionCode = 1 + extVersionCode = 2 } dependencies { - implementation(project(":lib-randomua")) + implementation project(":lib-randomua") } apply from: "$rootDir/common.gradle" diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/DataExtractor.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/DataExtractor.kt new file mode 100644 index 000000000..f9b775a70 --- /dev/null +++ b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/DataExtractor.kt @@ -0,0 +1,902 @@ +package eu.kanade.tachiyomi.extension.all.projectsuki + +import okhttp3.HttpUrl +import okhttp3.HttpUrl.Companion.toHttpUrl +import okhttp3.HttpUrl.Companion.toHttpUrlOrNull +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element +import org.jsoup.select.Elements +import java.text.SimpleDateFormat +import java.util.Calendar +import java.util.Date +import java.util.EnumMap +import java.util.Locale +import java.util.TimeZone + +/** + * @see EXTENSION_INFO Found in ProjectSuki.kt + */ +@Suppress("unused") +private inline val INFO: Nothing get() = error("INFO") + +internal typealias BookID = String +internal typealias ChapterID = String +internal typealias ScanGroup = String + +/** + * Gets the thumbnail image for a particular [bookID], [extension] if needed and [size]. + * + * Not all URLs produced by this function might point to a valid asset. + */ +internal fun bookThumbnailUrl(bookID: BookID, extension: String, size: UInt? = null): HttpUrl { + return homepageUrl.newBuilder() + .addPathSegment("images") + .addPathSegment("gallery") + .addPathSegment(bookID) + .addPathSegment( + when { + size == null && extension.isBlank() -> "thumb" + size == null -> "thumb.$extension" + extension.isBlank() -> "$size-thumb" + else -> "$size-thumb.$extension" + }, + ) + .build() +} + +/** + * Finds the closest common parent between 2 or more [elements]. + * + * If all [elements] are the same element, it will return the element itself. + * + * Returns null if the [elements] are not in the same [Document]. + */ +internal fun commonParent(vararg elements: Element): Element? { + require(elements.size > 1) { "elements must have more than 1 element" } + + val parents: List> = elements.map { it.parents().reversed().iterator() } + var lastCommon: Element? = null + + while (true) { + val layer: MutableSet = parents.mapTo(HashSet()) { + if (it.hasNext()) it.next() else null + } + if (null in layer) break + if (layer.size != 1) break + lastCommon = layer.single() + } + + return lastCommon +} + +/** + * Simple Utility class that represents a switching point between 2 patterns given by a certain predicate (see [switchingPoints]). + * + * For example in the sequence 111001 there are 2 switching points, + * the first one is 10, at indexes 2 and 3, + * and the second one is 01 at indexes 4 and 5. + * + * Both indexes and states are given for absolute clarity. + */ +internal data class SwitchingPoint(val left: Int, val right: Int, val leftState: Boolean, val rightState: Boolean) { + init { + if (left + 1 != right) { + reportErrorToUser { + "invalid SwitchingPoint: ($left, $right)" + } + } + if (leftState == rightState) { + reportErrorToUser { + "invalid SwitchingPoint: ($leftState, $rightState)" + } + } + } +} + +/** + * Function that will return all [SwitchingPoint]s in a certain sequence. + */ +internal fun Iterable.switchingPoints(predicate: (E) -> Boolean): List { + val iterator = iterator() + if (!iterator.hasNext()) return emptyList() + + val points: MutableList = ArrayList() + var state: Boolean = predicate(iterator.next()) + var index = 1 + for (element in iterator) { + val p = predicate(element) + if (state != p) { + points.add(SwitchingPoint(left = index - 1, right = index, leftState = state, rightState = p)) + state = p + } + index++ + } + + return points +} + +/** + * Utility class that can extract and format data from a certain [extractionElement]. + * + * Note that a [Document] is also an [Element]. + * + * The given [extractionElement] must have an [ownerDocument][Element.ownerDocument] with a valid absolute + * [location][Document.location] (according to [toHttpUrl]). + * + * [Lazy] properties are used to allow for the extraction process to happen only once + * (and for thread safety, see [LazyThreadSafetyMode], [lazy]). + * + * @author Federico d'Alonzo <me@npgx.dev> + */ +@Suppress("MemberVisibilityCanBePrivate") +class DataExtractor(val extractionElement: Element) { + + private val url: HttpUrl = extractionElement.ownerDocument()?.location()?.toHttpUrlOrNull() ?: reportErrorToUser { + buildString { + append("DataExtractor class requires a \"from\" element ") + append("that possesses an owner document with a valid absolute location(), but ") + append(extractionElement.ownerDocument()?.location()) + append(" was found!") + } + } + + /** + * All [anchor](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/a) tags + * that have a valid url in the [href](https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/href) + * [attribute](https://developer.mozilla.org/en-US/docs/Glossary/Attribute). + * + * To understand the [Element.select] methods, see [CSS selectors](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_selectors) + * and how to use them [to select DOM elements](https://developer.mozilla.org/en-US/docs/Web/API/Document_object_model/Locating_DOM_elements_using_selectors). + * + * JSoup's [Element.attr] methods supports the special `abs:` syntax when working with relative URLs. + * It is simply a shortcut to [Element.absUrl], which uses [Document.baseUri]. + */ + val allHrefAnchors: Map by lazy { + buildMap { + extractionElement.select("a[href]").forEach { a -> + val href = a.attr("abs:href") + if (href.isNotBlank()) { + href.toHttpUrlOrNull() + ?.let { this[a] = it } + } + } + } + } + + /** + * Filters [allHrefAnchors] for urls that satisfy `url.host.endsWith(homepageUrl.host)`. + * + * Meaning this property contains only elements that redirect to a Project Suki URL. + */ + val psHrefAnchors: Map by lazy { + allHrefAnchors.filterValues { url -> + url.host.endsWith(homepageUrl.host) + } + } + + /** Utility class that represents a "book" element, identifier by the [bookID]. */ + data class PSBook(val thumbnail: HttpUrl, val rawTitle: String, val bookUrl: HttpUrl, val bookID: BookID) { + override fun equals(other: Any?) = other is PSBook && this.bookID == other.bookID + override fun hashCode() = bookID.hashCode() + } + + /** + * This property contains all the [books][PSBook] contained in the [extractionElement]. + * + * Extraction is done by first obtaining all [psHrefAnchors], and using some heuristics + * to find the [PSBook.rawTitle] and [PSBook.thumbnail]'s extension. + * + * Other extensions might use CSS Selectors (see [DataExtractor]) to find these values in a fixed structure. + * But because [Project Suki](https://projectsuki.com) seems to be done by hand using [Bootstrap](https://getbootstrap.com/), + * it has a much more volatile structure. + * + * To make it possible to maintain this extension, data extraction is done by finding all elements in the page that redirect to + * book entries, and using generalized heuristics that should be robust to some types of changes. + * This has the disadvantage of making distinguishing between the different elements in a single page a nightmare, + * but luckly we don't need to do that for the purposes of a Tachiyomi extension. + */ + val books: Set by lazy { + buildSet { + data class BookUrlContainerElement(val container: Element, val href: HttpUrl, val matchResult: PathMatchResult) + + psHrefAnchors.entries + .map { (element, href) -> BookUrlContainerElement(element, href, href.matchAgainst(bookUrlPattern)) } + .filter { it.matchResult.doesMatch } + .groupBy { it.matchResult["bookid"]!!.value } + .forEach { (bookID: BookID, containers: List) -> + + val extension: String = containers.asSequence() + .flatMap { it.container.select("img") } + .mapNotNull { it.imageSrc() } + .map { it.matchAgainst(thumbnailUrlPattern) } + .filter { it.doesMatch } + .firstOrNull() + ?.get("thumbextension") + ?.value ?: "" + + val title: String = containers.asSequence() + .map { it.container } + .filter { it.select("img").isEmpty() } + .filter { it.parents().none { p -> p.tag().normalName() == "small" } } + .map { it.ownText() } + .filter { !it.equals("show more", ignoreCase = true) } + .firstOrNull() ?: reportErrorToUser { "Could not determine title for $bookID" } + + add( + PSBook( + thumbnail = bookThumbnailUrl(bookID, extension), + rawTitle = title, + bookUrl = homepageUrl.newBuilder() + .addPathSegment("book") + .addPathSegment(bookID) + .build(), + bookID = bookID, + ), + ) + } + } + } + + /** Utility class that extends [PSBook], by providing a [detailsTable], [alertData] and [description]. */ + data class PSBookDetails( + val book: PSBook, + val detailsTable: EnumMap, + val alertData: List, + val description: String, + ) { + override fun equals(other: Any?) = other is PSBookDetails && this.book == other.book + override fun hashCode() = book.hashCode() + } + + /** + * Represents a plethora of possibly-present data about some book. + * + * The process for extracting the details is described in the KDoc for [bookDetails]. + */ + @Suppress("RegExpUnnecessaryNonCapturingGroup") + enum class BookDetail(val display: String, val regex: Regex, val elementProcessor: (Element) -> String = { it.text() }) { + ALT_TITLE("Alt titles:", """(?:alternative|alt\.?) titles?:?""".toRegex(RegexOption.IGNORE_CASE)), + AUTHOR("Authors:", """authors?:?""".toRegex(RegexOption.IGNORE_CASE)), + ARTIST("Artists:", """artists?:?""".toRegex(RegexOption.IGNORE_CASE)), + STATUS("Status:", """status:?""".toRegex(RegexOption.IGNORE_CASE)), + ORIGIN("Origin:", """origin:?""".toRegex(RegexOption.IGNORE_CASE)), + RELEASE_YEAR("Release year:", """release(?: year):?""".toRegex(RegexOption.IGNORE_CASE)), + USER_RATING( + "User rating:", + """user ratings?:?""".toRegex(RegexOption.IGNORE_CASE), + elementProcessor = { ratings -> + val rates = when { + ratings.id() != "ratings" -> 0 + else -> ratings.children().count { it.hasClass("text-warning") } + } + + when (rates) { + in 1..5 -> "$rates/5" + else -> "?/5" + } + }, + ), + VIEWS("Views:", """views?:?""".toRegex(RegexOption.IGNORE_CASE)), + OFFICIAL("Official:", """official:?""".toRegex(RegexOption.IGNORE_CASE)), + PURCHASE("Purchase:", """purchase:?""".toRegex(RegexOption.IGNORE_CASE)), + GENRE("Genres:", """genre(?:\(s\))?:?""".toRegex(RegexOption.IGNORE_CASE)), + ; + + companion object { + private val values = values().toList() + fun from(type: String): BookDetail? = values.firstOrNull { it.regex.matches(type) } + } + } + + /** Used to detect visible/invisible alerts. */ + private val displayNoneRegex = """display: ?none;?""".toRegex(RegexOption.IGNORE_CASE) + + /** + * All [details][PSBookDetails] are extracted from a table-like list of `
` elements, + * found in the book main page, using generalized heuristics: + * + * First the algorithm looks for known entries in the "table" by looking for + * the [Status][BookDetail.STATUS] and [Origin][BookDetail.ORIGIN] fields. + * This is possible because these elements redirect to the [search](https://projectsuki.com/search) + * page with "status" and "origin" queries. + * + * The [commonParent] between the two elements is found and the table is subsequently analyzed. + * If this method fails, at least the [Author][BookDetail.AUTHOR], [Artist][BookDetail.ARTIST] and [Genre][BookDetail.GENRE] + * details are found via URLs. + * + * An extra [Genre][BookDetail.GENRE] is added when possible: + * - Origin: "kr" -> Genre: "Manhwa" + * - Origin: "cn" -> Genre: "Manhua" + * - Origin: "jp" -> Genre: "Manga" + * + * The book title, description and alerts are also found in similar ways. + * + * The description is expanded with all this information too. + */ + val bookDetails: PSBookDetails by lazy { + val match = url.matchAgainst(bookUrlPattern) + if (!match.doesMatch) reportErrorToUser { "cannot extract book details: $url" } + val bookID = match["bookid"]!!.value + + val authors: Map = psHrefAnchors.filter { (_, url) -> + url.queryParameterNames.contains("author") + } + + val artists: Map = psHrefAnchors.filter { (_, url) -> + url.queryParameterNames.contains("artist") + } + + val status: Map.Entry = psHrefAnchors.entries.single { (_, url) -> + url.queryParameterNames.contains("status") + } + + val origin: Map.Entry = psHrefAnchors.entries.single { (_, url) -> + url.queryParameterNames.contains("origin") + } + + val genres: Map = psHrefAnchors.filter { (_, url) -> + url.matchAgainst(genreSearchUrlPattern).doesMatch + } + + val details = EnumMap(BookDetail::class.java) + val tableParent: Element? = commonParent(status.key, origin.key) + val rows: List? = tableParent?.children()?.toList() + + for (row in (rows ?: emptyList())) { + val cols = row.children() + val typeElement = cols.getOrNull(0) ?: continue + val valueElement = cols.getOrNull(1) ?: continue + + val typeText = typeElement.text() + val detail = BookDetail.from(typeText) ?: continue + + details[detail] = detail.elementProcessor(valueElement) + } + + details.getOrPut(BookDetail.AUTHOR) { authors.keys.joinToString(", ") { it.text() } } + details.getOrPut(BookDetail.ARTIST) { artists.keys.joinToString(", ") { it.text() } } + details.getOrPut(BookDetail.STATUS) { status.key.text() } + details.getOrPut(BookDetail.ORIGIN) { origin.key.text() } + + details.getOrPut(BookDetail.GENRE) { genres.keys.joinToString(", ") { it.text() } } + + when (origin.value.queryParameter("origin")) { + "kr" -> "Manhwa" + "cn" -> "Manhua" + "jp" -> "Manga" + else -> null + }?.let { originGenre -> + details[BookDetail.GENRE] = """${details[BookDetail.GENRE]}, $originGenre""" + } + + val title: Element? = extractionElement.selectFirst("h2[itemprop=title]") ?: extractionElement.selectFirst("h2") ?: run { + // the common table is inside of a "row" wrapper that is the neighbour of the h2 containing the title + // if we sort of generalize this, the title should be the first + // text-node-bearing child of the table's grandparent + tableParent?.parent()?.parent()?.children()?.firstOrNull { it.textNodes().isNotEmpty() } + } + + val alerts: List = extractionElement.select(".alert, .alert-info") + .asSequence() + .filter { !it.attr("style").contains(displayNoneRegex) } + .filter { alert -> alert.parents().none { it.attr("style").contains(displayNoneRegex) } } + .map { alert -> + buildString { + var appendedSomething = false + alert.select("h4").singleOrNull()?.let { + appendLine(it.wholeText()) + appendedSomething = true + } + alert.select("p").singleOrNull()?.let { + appendLine(it.wholeText()) + appendedSomething = true + } + if (!appendedSomething) { + appendLine(alert.wholeText()) + } + } + } + .toList() + + val description = extractionElement.selectFirst("#descriptionCollapse") + ?.wholeText() ?: extractionElement.select(".description") + .joinToString("\n\n", postfix = "\n") { it.wholeText() } + + val extension = extractionElement.select("img") + .asSequence() + .mapNotNull { e -> e.imageSrc()?.let { e to it } } + .map { (img, src) -> img to src.matchAgainst(thumbnailUrlPattern) } + .filter { (_, match) -> match.doesMatch } + .firstOrNull() + ?.second + ?.get("thumbextension") + ?.value ?: "" + + PSBookDetails( + book = PSBook( + bookThumbnailUrl(bookID, extension), + title?.text() ?: reportErrorToUser { "could not determine book title from details for $bookID" }, + url, + bookID, + ), + detailsTable = details, + alertData = alerts, + description = description, + ) + } + + /** Represents some data type that a certain column in the chapters table represents. */ + sealed class ChaptersTableColumnDataType(val required: Boolean) { + + /** @return true if this data type is represented by a column's raw title. */ + abstract fun isRepresentedBy(from: String): Boolean + + /** Represents the chapter's title, which also normally includes the chapter number. */ + /*data*/ object Chapter : ChaptersTableColumnDataType(required = true) { + private val chapterHeaderRegex = """chapters?""".toRegex(RegexOption.IGNORE_CASE) + override fun isRepresentedBy(from: String): Boolean = from.matches(chapterHeaderRegex) + } + + /** Represents the chapter's scan group. */ + /*data*/ object Group : ChaptersTableColumnDataType(required = true) { + private val groupHeaderRegex = """groups?""".toRegex(RegexOption.IGNORE_CASE) + override fun isRepresentedBy(from: String): Boolean = from.matches(groupHeaderRegex) + } + + /** Represents the chapter's release date (when it was added to the site). */ + /*data*/ object Added : ChaptersTableColumnDataType(required = true) { + private val dateHeaderRegex = """added|date""".toRegex(RegexOption.IGNORE_CASE) + override fun isRepresentedBy(from: String): Boolean = from.matches(dateHeaderRegex) + } + + /** Represents the chapter's language. */ + /*data*/ object Language : ChaptersTableColumnDataType(required = false) { + private val languageHeaderRegex = """language""".toRegex(RegexOption.IGNORE_CASE) + override fun isRepresentedBy(from: String): Boolean = from.matches(languageHeaderRegex) + } + + /** Represents the chapter's view count. */ + /*data*/ object Views : ChaptersTableColumnDataType(required = false) { + @Suppress("RegExpUnnecessaryNonCapturingGroup") + private val languageHeaderRegex = """views?(?:\s*count)?""".toRegex(RegexOption.IGNORE_CASE) + override fun isRepresentedBy(from: String): Boolean = from.matches(languageHeaderRegex) + } + + companion object { + val all: Set by lazy { setOf(Chapter, Group, Added, Language, Views) } + val required: Set by lazy { all.filterTo(LinkedHashSet()) { it.required } } + + /** + * Takes the list of [headers] and returns a map that + * represents which data type is contained in which column index. + * + * Not all column indexes might be present if some column isn't recognised as a data type listed above. + */ + fun extractDataTypes(headers: List): Map { + return buildMap { + headers.map { it.text() } + .forEachIndexed { columnIndex, columnHeaderText -> + all.forEach { dataType -> + if (dataType.isRepresentedBy(columnHeaderText)) { + put(dataType, columnIndex) + } + } + } + } + } + } + } + + /** Represents a book's chapter. */ + data class BookChapter( + val chapterUrl: HttpUrl, + val chapterMatchResult: PathMatchResult, + val chapterTitle: String, + val chapterNumber: ChapterNumber?, + val chapterGroup: ScanGroup, + val chapterDateAdded: Date?, + val chapterLanguage: String, + ) { + + @Suppress("unused") + val bookID: BookID = chapterMatchResult["bookid"]!!.value + + @Suppress("unused") + val chapterID: ChapterID = chapterMatchResult["chapterid"]!!.value + } + + /** + * This property contains all the [BookChapter]s contained in the [extractionElement], grouped by the [ScanGroup]. + * + * The extraction proceeds by first finding all `` elements and then progressively refines + * the extracted data to remove false positives, combining all the extracted data and removing duplicates at the end. + * + * The `` element is analyzed to find the corresponding data types, this is resistant to shuffles + * (e.g. if the Chapter and Language columns are swapped, this will work anyways). + * + * Then the `` rows (``) are one by one processed to find the ones that match the column (`
`) + * size and data type positions that we care about. + */ + val bookChapters: Map> by lazy { + data class RawTable(val self: Element, val thead: Element, val tbody: Element) + data class AnalyzedTable(val raw: RawTable, val columnDataTypes: Map, val dataRows: List) + + val allChaptersByGroup: MutableMap> = extractionElement.select("table") + .asSequence() + .mapNotNull { tableElement -> + tableElement.selectFirst("thead")?.let { thead -> + tableElement.selectFirst("tbody")?.let { tbody -> + RawTable(tableElement, thead, tbody) + } + } + } + .mapNotNull { rawTable -> + val (_: Element, theadElement: Element, tbodyElement: Element) = rawTable + + val columnDataTypes: Map = theadElement.select("tr").asSequence() + .mapNotNull { headerRow -> + ChaptersTableColumnDataType.extractDataTypes(headers = headerRow.select("td")) + .takeIf { it.keys.containsAll(ChaptersTableColumnDataType.required) } + } + .firstOrNull() ?: return@mapNotNull null + + val dataRows: List = tbodyElement.select("tr") + .asSequence() + .map { it.children() } + .filter { it.size == columnDataTypes.size } + .toList() + + AnalyzedTable(rawTable, columnDataTypes, dataRows) + } + .map { analyzedTable -> + val (_: RawTable, columnDataTypes: Map, dataRows: List) = analyzedTable + + val rawData: List> = dataRows.map { row -> + columnDataTypes.mapValues { (_, columnIndex) -> + row[columnIndex] + } + } + + val rawByGroup: Map>> = rawData.groupBy { data -> + data[ChaptersTableColumnDataType.Group]!!.text() + } + + val chaptersByGroup: Map> = rawByGroup.mapValues { (groupName, chapters: List>) -> + chapters.map { data: Map -> + val chapterElement: Element = data[ChaptersTableColumnDataType.Chapter]!! + val addedElement: Element = data[ChaptersTableColumnDataType.Added]!! + val languageElement: Element? = data[ChaptersTableColumnDataType.Language] + // val viewsElement = data[ChaptersTableColumnDataType.Views] + + val chapterUrl: HttpUrl = (chapterElement.selectFirst("a[href]") ?: reportErrorToUser { "Could not determine chapter url for ${chapterElement.text()}" }) + .attr("abs:href") + .toHttpUrl() + val chapterUrlMatch: PathMatchResult = chapterUrl.matchAgainst(chapterUrlPattern) + + val chapterNumber: ChapterNumber? = chapterElement.text().tryAnalyzeChapterNumber() + val dateAdded: Date? = addedElement.text().tryAnalyzeChapterDate() + val chapterLanguage: String = languageElement?.text()?.trim()?.lowercase(Locale.US) ?: UNKNOWN_LANGUAGE + + BookChapter( + chapterUrl = chapterUrl, + chapterMatchResult = chapterUrlMatch, + chapterTitle = chapterElement.text(), + chapterNumber = chapterNumber, + chapterGroup = groupName, + chapterDateAdded = dateAdded, + chapterLanguage = chapterLanguage, + ) + } + } + + chaptersByGroup + } + .map { chaptersByGroup -> + chaptersByGroup.mapValues { (_, chapters) -> + chapters.tryInferMissingChapterNumbers() + } + } + .fold(LinkedHashMap()) { map, next -> + map.apply { + next.forEach { (group, chapters) -> + getOrPut(group) { ArrayList() }.addAll(chapters) + } + } + } + + allChaptersByGroup + } + + /** + * Utility class that represents a chapter number. + * + * Ordering is implemented in the way a human would most likely expect chapters to be ordered, + * e.g. chapter 10.15 comes after chapter 10.9 + */ + data class ChapterNumber(val main: UInt, val sub: UInt) : Comparable { + override fun compareTo(other: ChapterNumber): Int = comparator.compare(this, other) + + companion object { + val comparator: Comparator by lazy { compareBy({ it.main }, { it.sub }) } + val chapterNumberRegex: Regex = """(?:chapter|ch\.?)\s*(\d+)(?:\s*[.,-]\s*(\d+)?)?""".toRegex(RegexOption.IGNORE_CASE) + } + } + + /** Tries to infer the chapter number from the raw title. */ + private fun String.tryAnalyzeChapterNumber(): ChapterNumber? { + return ChapterNumber.chapterNumberRegex + .find(this) + ?.let { simpleMatchResult -> + val main: UInt = simpleMatchResult.groupValues[1].toUInt() + val sub: UInt = simpleMatchResult.groupValues[2].takeIf { it.isNotBlank() }?.toUInt() ?: 0u + + ChapterNumber(main, sub) + } + } + + /** + * Represents an index where the chapter number is unknown and + * whether or not the previous (above, next numerical chapter) + * or next (below, previous numerical chapter) chapter numbers + * are known. + * + * Requires [aboveIsKnown] or [belowIsKnown] to be true (or both). + */ + data class MissingChapterNumberEdge(val index: Int, val aboveIsKnown: Boolean, val belowIsKnown: Boolean) { + init { + require(aboveIsKnown || belowIsKnown) { "previous or next index must be known (or both)" } + } + } + + /** + * Chapter titles usually contain "Chapter xx" or "Ch. xx", but to provide some way to patch + * eventual holes (which happened before with "Ch." which wasn't accounted for), this method is provided. + * + * The algorithm tries to infer the chapter numbers by using correctly + * inferred zones and expanding them. + * + * The theoretical behaviour of this algorithm can easily be represented by + * using + for known and - for unknown chapter numbers + * (think of a 1D cellular automaton with very simple rules). + * An example (coarse) timeline could look like this: + * ``` + * -++--++---+-+++-- + * ++++++++-+++++++- + * +++++++++++++++++ + * ``` + * The actual changes always happen in a loop-like behaviour from left to right. + * We can use this to our advantage. + * + * Inference is done on a best-guess basis based on neighbouring values. + * Reporting to the user is preferred to avoid providing weird values. + */ + private fun List.tryInferMissingChapterNumbers(): List { + if (isEmpty()) return emptyList() + + val switchingPoints: List = switchingPoints { it.chapterNumber != null } + val missingChapterNumberEdges: ArrayDeque = ArrayDeque() + + when { + switchingPoints.isEmpty() && first().chapterNumber == null -> { + // oh dear, nothing is known + reportErrorToUser { "No chapter numbers could be inferred!" } + } + + switchingPoints.isEmpty() /* && first().chapterNumber != null */ -> { + // all are known + return this + } + } + + // convert switching points into an easier-to-handle format + switchingPoints.forEach { (left, right, leftIsKnown, rightIsKnown) -> + when { + leftIsKnown && !rightIsKnown -> { + // going from known to unknown in top to bottom direction + // chapters go in inverse order, so top is last, bottom is first + // left is top, right is bottom. + // subject of discussion is the right one (the unknown). + // this is the simpler case because we're going from known numbers + // to unknown. + missingChapterNumberEdges.add(MissingChapterNumberEdge(right, aboveIsKnown = true, belowIsKnown = false)) + } + + else -> { + // SwitchingPoint contract's guarantees: leftIsKnown = false, rightIsKnown = true + + // we were on "unknown" territory, and going to known + // subject of discussion is the left one (the unknown). + // there is a special case in which the unknown chapter is only one + // with known numbers in both directions. + // we need to account for that by checking if the last added member + // of missingChapterNumberEdges (if any) has index equal to "left" element + // (the subject, unknown) + // in which case we replace it, with a bi-directional MissingChapterNumberEdge + val last: MissingChapterNumberEdge? = missingChapterNumberEdges.lastOrNull() + when (last?.index == left) { + true -> { + // surrounded, replace + missingChapterNumberEdges[missingChapterNumberEdges.lastIndex] = MissingChapterNumberEdge(left, aboveIsKnown = true, belowIsKnown = true) + } + + else -> { + // 2 or more unknown sequence + missingChapterNumberEdges.add(MissingChapterNumberEdge(left, aboveIsKnown = false, belowIsKnown = true)) + } + } + } + } + } + + // previous chapter number + fun ChapterNumber.predictBelow(): ChapterNumber = when (sub) { + 0u -> ChapterNumber(main - 1u, 0u) // before chapter 18, chapter 17 + 5u -> ChapterNumber(main, 0u) // before chapter 18.5, chapter 18 + else -> ChapterNumber(main, sub - 1u) // before chapter 18.4, chapter 18.3 + } + + // next chapter number + fun ChapterNumber.predictAbove(): ChapterNumber = when (sub) { + 0u, 5u -> ChapterNumber(main + 1u, 0u) // after chapter 17 or 17.5, chapter 18 + else -> ChapterNumber(main, sub + 1u) // after chapter 18.3, 18.4 + } + + fun MissingChapterNumberEdge.indexAbove(): Int = index - 1 + fun MissingChapterNumberEdge.indexBelow(): Int = index + 1 + + val result: MutableList = ArrayList(this) + while (missingChapterNumberEdges.isNotEmpty()) { + val edge: MissingChapterNumberEdge = missingChapterNumberEdges.removeFirst() + + when { + edge.aboveIsKnown && edge.belowIsKnown -> { + // both are known + val above: BookChapter = result[edge.indexAbove()] + val below: BookChapter = result[edge.indexBelow()] + + val inferredByDecreasing = above.chapterNumber!!.predictBelow() + val inferredByIncreasing = below.chapterNumber!!.predictAbove() + + when { + above.chapterNumber == below.chapterNumber -> { + reportErrorToUser { "Chapter number inference failed (case 0)!" } + } + + above.chapterNumber < below.chapterNumber -> { + reportErrorToUser { "Chapter number inference failed (case 1)!" } + } + + inferredByDecreasing == inferredByIncreasing -> { + // inference agrees from both sides + result[edge.index] = result[edge.index].copy(chapterNumber = inferredByDecreasing) + } + + // might be handled by above, just for safety + inferredByIncreasing >= above.chapterNumber || inferredByDecreasing <= below.chapterNumber -> { + reportErrorToUser { "Chapter number inference failed (case 2)!" } + } + + inferredByDecreasing > inferredByIncreasing -> { + // gap between chapters, take the lowest + result[edge.index] = result[edge.index].copy(chapterNumber = inferredByIncreasing) + } + + else -> { + // inferredByIncreasing > inferredByDecreasing should be handled by branch 2 above + // everything else should be reported to user + reportErrorToUser { "Chapter number inference failed (case 3)!" } + } + } + } + + edge.aboveIsKnown -> { + // only above is known + val above: BookChapter = result[edge.indexAbove()] + val inferredByDecreasing = above.chapterNumber!!.predictBelow() + + // handle this one + result[edge.index] = result[edge.index].copy(chapterNumber = inferredByDecreasing) + + // there are 2 main cases, where + is known, - is unknown, * just changed above and . is anything + // case 1: ..+*-+.. + // case 2: ..+*--.. + when (missingChapterNumberEdges.firstOrNull()?.index == edge.index + 1) { + true -> { + // replace next edge with surrounded + val removed = missingChapterNumberEdges.removeFirst() + missingChapterNumberEdges.addFirst(removed.copy(aboveIsKnown = true, belowIsKnown = false)) + } + + false -> { + // add new edge below current edge's index + missingChapterNumberEdges.addLast(MissingChapterNumberEdge(edge.indexBelow(), aboveIsKnown = true, belowIsKnown = false)) + } + } + } + + edge.belowIsKnown -> { + // only below is known + val below: BookChapter = result[edge.index + 1] + val inferredByIncreasing = below.chapterNumber!!.predictAbove() + + // handle this one + result[edge.index] = result[edge.index].copy(chapterNumber = inferredByIncreasing) + + // there are 2 main cases (like see above): + // case 1: ..+-*+.. + // case 2: ..--*+.. + when (missingChapterNumberEdges.lastOrNull()?.index == edge.index - 1) { + true -> { + // replace last edge with surrounded + val removed = missingChapterNumberEdges.removeLast() + missingChapterNumberEdges.addLast(removed.copy(aboveIsKnown = true, belowIsKnown = true)) + } + + false -> { + // add new edge above current edge's index + missingChapterNumberEdges.addLast(MissingChapterNumberEdge(edge.indexAbove(), aboveIsKnown = false, belowIsKnown = true)) + } + } + } + + else -> { + // shouldn't be possible + reportErrorToUser { "Chapter number inference failed (case 4)!" } + } + } + } + + return result + } + + /** + * ThreadLocal [SimpleDateFormat] (SimpleDateFormat is not thread safe). + */ + private val absoluteDateFormat: ThreadLocal = object : ThreadLocal() { + override fun initialValue() = runCatching { SimpleDateFormat("MMMM dd, yyyy", Locale.US) }.fold( + onSuccess = { it }, + onFailure = { reportErrorToUser { "Invalid SimpleDateFormat(MMMM dd, yyyy)" } }, + ) + } + + private val relativeChapterDateRegex = """(\d+)\s+(years?|months?|weeks?|days?|hours?|mins?|minutes?|seconds?|sec)\s+ago""".toRegex(RegexOption.IGNORE_CASE) + + /** + * Tries to parse a possibly human-readable relative [Date]. + * + * @see Calendar + */ + private fun String.tryAnalyzeChapterDate(): Date? { + return when (val match = relativeChapterDateRegex.matchEntire(trim())) { + null -> { + absoluteDateFormat.get() + .runCatching { this!!.parse(this@tryAnalyzeChapterDate) } + .fold( + onSuccess = { it }, + onFailure = { reportErrorToUser { "Could not parse date: $this" } }, + ) + } + + else -> { + // relative + val number: Int = match.groupValues[1].toInt() + val relativity: String = match.groupValues[2] + val cal: Calendar = Calendar.getInstance(TimeZone.getDefault(), Locale.US) + + with(relativity) { + when { + startsWith("year") -> cal.add(Calendar.YEAR, -number) + startsWith("month") -> cal.add(Calendar.MONTH, -number) + startsWith("week") -> cal.add(Calendar.DAY_OF_MONTH, -number * 7) + startsWith("day") -> cal.add(Calendar.DAY_OF_MONTH, -number) + startsWith("hour") -> cal.add(Calendar.HOUR, -number) + startsWith("min") -> cal.add(Calendar.MINUTE, -number) + startsWith("sec") -> cal.add(Calendar.SECOND, -number) + } + } + + cal.time + } + } + } +} diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/NormalizedURL.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/NormalizedURL.kt deleted file mode 100644 index 3f6840dde..000000000 --- a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/NormalizedURL.kt +++ /dev/null @@ -1,54 +0,0 @@ -package eu.kanade.tachiyomi.extension.all.projectsuki - -import okhttp3.HttpUrl -import okhttp3.HttpUrl.Companion.toHttpUrl -import okhttp3.HttpUrl.Companion.toHttpUrlOrNull -import org.jsoup.nodes.Element - -typealias NormalizedURL = HttpUrl - -val NormalizedURL.rawAbsolute: String - get() = toString() - -private val psDomainURI = """https://projectsuki.com/""".toHttpUrl().toUri() - -val NormalizedURL.rawRelative: String? - get() { - val uri = toUri() - return psDomainURI - .relativize(uri) - .takeIf { it != uri } - ?.let { """/$it""" } - } - -private val protocolMatcher = """^https?://""".toRegex() -private val domainMatcher = """^https?://(?:[a-zA-Z\d\-]+\.)+[a-zA-Z\d\-]+""".toRegex() -fun String.toNormalURL(): NormalizedURL? { - if (contains(':') && !contains(protocolMatcher)) { - return null - } - - val toParse = StringBuilder() - - if (!contains(domainMatcher)) { - toParse.append("https://projectsuki.com") - if (!this.startsWith("/")) toParse.append('/') - } - - toParse.append(this) - - return toParse.toString().toHttpUrlOrNull() -} - -fun NormalizedURL.pathStartsWith(other: Iterable): Boolean = pathSegments.zip(other).all { (l, r) -> l == r } - -fun NormalizedURL.isPSUrl() = host.endsWith("${PS.identifier}.com") - -fun NormalizedURL.isBookURL() = isPSUrl() && pathSegments.first() == "book" -fun NormalizedURL.isReadURL() = isPSUrl() && pathStartsWith(PS.chapterPath) -fun NormalizedURL.isImagesGalleryURL() = isPSUrl() && pathStartsWith(PS.pagePath) - -fun Element.attrNormalizedUrl(attrName: String): NormalizedURL? { - val attrValue = attr("abs:$attrName").takeIf { it.isNotBlank() } ?: return null - return attrValue.toNormalURL() -} diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PS.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PS.kt deleted file mode 100644 index 68c4dd935..000000000 --- a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PS.kt +++ /dev/null @@ -1,129 +0,0 @@ -@file:Suppress("MayBeConstant", "unused") - -package eu.kanade.tachiyomi.extension.all.projectsuki - -import org.jsoup.nodes.Element -import java.util.Calendar -import java.util.Locale -import kotlin.concurrent.getOrSet - -@Suppress("MemberVisibilityCanBePrivate") -internal object PS { - const val identifier: String = "projectsuki" - const val identifierShort: String = "ps" - - val bookPath = listOf("book") - val pagePath = listOf("images", "gallery") - val chapterPath = listOf("read") - - const val SEARCH_INTENT_PREFIX: String = "$identifierShort:" - - const val PREFERENCE_WHITELIST_LANGUAGES = "$identifier-languages-whitelist" - const val PREFERENCE_WHITELIST_LANGUAGES_TITLE = "Whitelist the following languages:" - const val PREFERENCE_WHITELIST_LANGUAGES_SUMMARY = - "Will keep project chapters in the following languages." + - " Takes precedence over blacklisted languages." + - " It will match the string present in the \"Language\" column of the chapter." + - " Whitespaces will be trimmed." + - " Leave empty to allow all languages." + - " Separate each entry with a comma ','" - - const val PREFERENCE_BLACKLIST_LANGUAGES = "$identifier-languages-blacklist" - const val PREFERENCE_BLACKLIST_LANGUAGES_TITLE = "Blacklist the following languages:" - const val PREFERENCE_BLACKLIST_LANGUAGES_SUMMARY = - "Will hide project chapters in the following languages." + - " Works identically to whitelisting." -} - -fun Element.containsBookLinks(): Boolean = select("a").any { - it.attrNormalizedUrl("href")?.isBookURL() == true -} - -fun Element.containsReadLinks(): Boolean = select("a").any { - it.attrNormalizedUrl("href")?.isReadURL() == true -} - -fun Element.containsImageGalleryLinks(): Boolean = select("a").any { - it.attrNormalizedUrl("href")?.isImagesGalleryURL() == true -} - -fun Element.getAllUrlElements(selector: String, attrName: String, predicate: (NormalizedURL) -> Boolean): Map { - return select(selector) - .mapNotNull { element -> element.attrNormalizedUrl(attrName)?.let { element to it } } - .filter { (_, url) -> predicate(url) } - .toMap() -} - -fun Element.getAllBooks(): Map { - val bookUrls = getAllUrlElements("a", "href") { it.isBookURL() } - val byID: Map> = bookUrls.groupBy { (_, url) -> url.pathSegments[1] /* /book/ */ } - - @Suppress("UNCHECKED_CAST") - return byID.mapValues { (bookid, elements) -> - val thumb: Element? = elements.entries.firstNotNullOfOrNull { (element, _) -> - element.select("img").firstOrNull() - } - val title = elements.entries.firstOrNull { (element, _) -> - element.select("img").isEmpty() && element.text().let { - it.isNotBlank() && it.lowercase(Locale.US) != "show more" - } - } - - if (thumb != null && title != null) { - PSBook(thumb, title.key, title.key.text(), bookid, title.value) - } else { - null - } - }.filterValues { it != null } as Map -} - -inline fun Map.groupBy(keySelector: (Map.Entry) -> SK): Map> = buildMap<_, MutableMap> { - this@groupBy.entries.forEach { entry -> - getOrPut(keySelector(entry)) { HashMap() }[entry.key] = entry.value - } -} - -private val absoluteDateFormat: ThreadLocal = ThreadLocal() -fun String.parseDate(ifFailed: Long = 0L): Long { - return when { - endsWith("ago") -> { - // relative - val number = takeWhile { it.isDigit() }.toInt() - val cal = Calendar.getInstance() - - when { - contains("day") -> cal.apply { add(Calendar.DAY_OF_MONTH, -number) } - contains("hour") -> cal.apply { add(Calendar.HOUR, -number) } - contains("minute") -> cal.apply { add(Calendar.MINUTE, -number) } - contains("second") -> cal.apply { add(Calendar.SECOND, -number) } - contains("week") -> cal.apply { add(Calendar.DAY_OF_MONTH, -number * 7) } - contains("month") -> cal.apply { add(Calendar.MONTH, -number) } - contains("year") -> cal.apply { add(Calendar.YEAR, -number) } - else -> null - }?.timeInMillis ?: ifFailed - } - - else -> { - // absolute? - absoluteDateFormat.getOrSet { java.text.SimpleDateFormat("MMMM dd, yyyy", Locale.US) }.parse(this)?.time ?: ifFailed - } - } -} - -private val imageExtensions = setOf(".jpg", ".png", ".jpeg", ".webp", ".gif", ".avif", ".tiff") -private val simpleSrcVariants = listOf("src", "data-src", "data-lazy-src") -fun Element.imgNormalizedURL(): NormalizedURL? { - simpleSrcVariants.forEach { variant -> - if (hasAttr(variant)) { - return attrNormalizedUrl(variant) - } - } - - if (hasAttr("srcset")) { - return attr("abs:srcset").substringBefore(" ").toNormalURL() - } - - return attributes().firstOrNull { - it.key.contains("src") && imageExtensions.any { ext -> it.value.contains(ext) } - }?.value?.substringBefore(" ")?.toNormalURL() -} diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSBook.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSBook.kt deleted file mode 100644 index 87198dee1..000000000 --- a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSBook.kt +++ /dev/null @@ -1,11 +0,0 @@ -package eu.kanade.tachiyomi.extension.all.projectsuki - -import org.jsoup.nodes.Element - -data class PSBook( - val imgElement: Element, - val titleElement: Element, - val title: String, - val mangaID: String, - val url: NormalizedURL, -) diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSFilters.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSFilters.kt deleted file mode 100644 index dfa2d1646..000000000 --- a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PSFilters.kt +++ /dev/null @@ -1,90 +0,0 @@ -@file:Suppress("CanSealedSubClassBeObject") - -package eu.kanade.tachiyomi.extension.all.projectsuki - -import eu.kanade.tachiyomi.source.model.Filter -import okhttp3.HttpUrl - -@Suppress("NOTHING_TO_INLINE") -object PSFilters { - internal sealed interface AutoFilter { - fun applyTo(builder: HttpUrl.Builder) - } - - private inline fun HttpUrl.Builder.setAdv() = setQueryParameter("adv", "1") - - class Author : Filter.Text("Author"), AutoFilter { - - override fun applyTo(builder: HttpUrl.Builder) { - when { - state.isNotBlank() -> builder.setAdv().addQueryParameter("author", state) - } - } - - companion object { - val ownHeader by lazy { Header("Cannot search by multiple authors") } - } - } - - class Artist : Filter.Text("Artist"), AutoFilter { - - override fun applyTo(builder: HttpUrl.Builder) { - when { - state.isNotBlank() -> builder.setAdv().addQueryParameter("artist", state) - } - } - - companion object { - val ownHeader by lazy { Header("Cannot search by multiple artists") } - } - } - - class Status : Filter.Select("Status", Value.values()), AutoFilter { - enum class Value(val display: String, val query: String) { - ANY("Any", ""), - ONGOING("Ongoing", "ongoing"), - COMPLETED("Completed", "completed"), - HIATUS("Hiatus", "hiatus"), - CANCELLED("Cancelled", "cancelled"), - ; - - override fun toString(): String = display - - companion object { - private val values: Array = values() - operator fun get(ordinal: Int) = values[ordinal] - } - } - - override fun applyTo(builder: HttpUrl.Builder) { - when (val state = Value[state]) { - Value.ANY -> {} // default, do nothing - else -> builder.setAdv().addQueryParameter("status", state.query) - } - } - } - - class Origin : Filter.Select("Origin", Value.values()), AutoFilter { - enum class Value(val display: String, val query: String?) { - ANY("Any", null), - KOREA("Korea", "kr"), - CHINA("China", "cn"), - JAPAN("Japan", "jp"), - ; - - override fun toString(): String = display - - companion object { - private val values: Array = Value.values() - operator fun get(ordinal: Int) = values[ordinal] - } - } - - override fun applyTo(builder: HttpUrl.Builder) { - when (val state = Value[state]) { - Value.ANY -> {} // default, do nothing - else -> builder.setAdv().addQueryParameter("origin", state.query) - } - } - } -} diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PathPattern.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PathPattern.kt new file mode 100644 index 000000000..734b821c7 --- /dev/null +++ b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/PathPattern.kt @@ -0,0 +1,84 @@ +package eu.kanade.tachiyomi.extension.all.projectsuki + +import okhttp3.HttpUrl + +/** + * @see EXTENSION_INFO Found in ProjectSuki.kt + */ +@Suppress("unused") +private inline val INFO: Nothing get() = error("INFO") + +/** + * Utility class made to help identify different urls. + * + * null regex means wildcard, matches anything. + * + * Meant to be used with [matchAgainst], will match against [HttpUrl.pathSegments] + * + * @author Federico d'Alonzo <me@npgx.dev> + */ +data class PathPattern(val paths: List) { + constructor(vararg paths: Regex?) : this(paths.asList()) + + init { + if (paths.isEmpty()) { + reportErrorToUser { + "Invalid PathPattern, cannot be empty!" + } + } + } +} + +/** + * Utility class to represent the [MatchResult]s obtained when matching a [PathPattern] + * against an [HttpUrl]. + * + * When [matchResults] is null, it means the [HttpUrl] either: + * - when `allowSubPaths` in [matchAgainst] is `false`: [HttpUrl.pathSegments]`.size` != [PathPattern.paths]`.size` + * - when `allowSubPaths` in [matchAgainst] is `true`: [HttpUrl.pathSegments]`.size` < [PathPattern.paths]`.size` + * + * @see matchAgainst + * + * @author Federico d'Alonzo <me@npgx.dev> + */ +data class PathMatchResult(val doesMatch: Boolean, val matchResults: List?) { + operator fun get(name: String): MatchGroup? = matchResults?.firstNotNullOfOrNull { + it?.groups + // this throws if the group by "name" isn't found AND can return null too + ?.runCatching { get(name) } + ?.getOrNull() + } + + init { + if (matchResults?.isEmpty() == true) { + reportErrorToUser { + "Invalid PathMatchResult, matchResults must either be null or not empty!" + } + } + } +} + +/** + * @see PathPattern + * @see PathMatchResult + */ +fun HttpUrl.matchAgainst(pattern: PathPattern, allowSubPaths: Boolean = false, ignoreEmptySegments: Boolean = true): PathMatchResult { + val actualSegments: List = if (ignoreEmptySegments) pathSegments.filter { it.isNotBlank() } else pathSegments + val sizeReq = when (allowSubPaths) { + false -> actualSegments.size == pattern.paths.size + true -> actualSegments.size >= pattern.paths.size + } + + if (!sizeReq) return PathMatchResult(false, null) + + val matchResults: MutableList = ArrayList() + var matches = true + + actualSegments.zip(pattern.paths) { segment, regex -> + val match: MatchResult? = regex?.matchEntire(segment) + matchResults.add(match) + matches = matches && (regex == null || match != null) + } + + return PathMatchResult(matches, matchResults) +} diff --git a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSuki.kt b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSuki.kt index 0dbf62fc6..c824e5970 100644 --- a/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSuki.kt +++ b/src/all/projectsuki/src/eu/kanade/tachiyomi/extension/all/projectsuki/ProjectSuki.kt @@ -1,15 +1,10 @@ package eu.kanade.tachiyomi.extension.all.projectsuki -import android.app.Application -import android.content.SharedPreferences -import androidx.preference.EditTextPreference import androidx.preference.PreferenceScreen -import eu.kanade.tachiyomi.lib.randomua.addRandomUAPreferenceToScreen import eu.kanade.tachiyomi.lib.randomua.getPrefCustomUA import eu.kanade.tachiyomi.lib.randomua.getPrefUAType import eu.kanade.tachiyomi.lib.randomua.setRandomUserAgent import eu.kanade.tachiyomi.network.GET -import eu.kanade.tachiyomi.network.POST import eu.kanade.tachiyomi.network.asObservableSuccess import eu.kanade.tachiyomi.network.interceptor.rateLimit import eu.kanade.tachiyomi.source.ConfigurableSource @@ -22,241 +17,441 @@ import eu.kanade.tachiyomi.source.model.SManga import eu.kanade.tachiyomi.source.model.UpdateStrategy import eu.kanade.tachiyomi.source.online.HttpSource import eu.kanade.tachiyomi.util.asJsoup -import kotlinx.serialization.encodeToString import kotlinx.serialization.json.Json -import kotlinx.serialization.json.jsonObject -import kotlinx.serialization.json.jsonPrimitive import okhttp3.HttpUrl import okhttp3.HttpUrl.Companion.toHttpUrl -import okhttp3.MediaType.Companion.toMediaType +import okhttp3.HttpUrl.Companion.toHttpUrlOrNull import okhttp3.OkHttpClient import okhttp3.Request -import okhttp3.RequestBody.Companion.toRequestBody import okhttp3.Response -import org.jsoup.Jsoup -import org.jsoup.nodes.Element +import org.jsoup.nodes.Document import rx.Observable -import uy.kohesive.injekt.Injekt -import uy.kohesive.injekt.api.get +import java.net.URI import java.util.Locale +import java.util.concurrent.TimeUnit +import kotlin.math.floor +import kotlin.math.log10 +import kotlin.math.pow +/** + * [Project Suki](https://projectsuki.com) + * [Tachiyomi](https://github.com/tachiyomiorg/tachiyomi) + * [extension](https://github.com/tachiyomiorg/tachiyomi-extensions) + * + * Most of the code should be documented, `@author` KDoc tags are mostly to know + * who to bother *when necessary*. + * If you contributed to this extension, be sure to add yourself in an `@author` tag! + * + * If you want to understand how this extension works, + * I recommend first looking at [ProjectSuki], then [DataExtractor], + * then the rest of the project. + */ +internal inline val EXTENSION_INFO: Nothing get() = error("EXTENSION_INFO") + +internal const val SHORT_FORM_ID: String = """ps""" + +internal val homepageUrl: HttpUrl = "https://projectsuki.com".toHttpUrl() +internal val homepageUri: URI = homepageUrl.toUri() + +/** PATTERN: `https://projectsuki.com/book/` */ +internal val bookUrlPattern = PathPattern( + """book""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** PATTERN: `https://projectsuki.com/browse/` */ +@Suppress("unused") +internal val browsePattern = PathPattern( + """browse""".toRegex(RegexOption.IGNORE_CASE), + """(?\d+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** + * PATTERN: `https://projectsuki.com/read///` + * + * `` is actually a filter of sorts that will remove pages < ``'s value. + */ +internal val chapterUrlPattern = PathPattern( + """read""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** + * PATTERNS: + * - `https://projectsuki.com/images/gallery//thumb` + * - `https://projectsuki.com/images/gallery//thumb.` + * - `https://projectsuki.com/images/gallery//-thumb` + * - `https://projectsuki.com/images/gallery//-thumb.` + */ +internal val thumbnailUrlPattern = PathPattern( + """images""".toRegex(RegexOption.IGNORE_CASE), + """gallery""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), + """(?\d+-)?thumb(?:\.(?.+))?""".toRegex(RegexOption.IGNORE_CASE), +) + +/** PATTERN: `https://projectsuki.com/images/gallery///` */ +internal val pageUrlPattern = PathPattern( + """images""".toRegex(RegexOption.IGNORE_CASE), + """gallery""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** PATTERN: `https://projectsuki.com/genre/` */ +internal val genreSearchUrlPattern = PathPattern( + """genre""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** PATTERN: `https://projectsuki.com/group/` */ +@Suppress("unused") +internal val groupUrlPattern = PathPattern( + """group""".toRegex(RegexOption.IGNORE_CASE), + """(?.+)""".toRegex(RegexOption.IGNORE_CASE), +) + +/** + * Used on the website when there's an image loading error, could be used in extension. + */ +@Suppress("unused") +internal val emptyImageUrl: HttpUrl = homepageUrl.newBuilder() + .addPathSegment("images") + .addPathSegment("gallery") + .addPathSegment("empty.jpg") + .build() + +/** + * Removes the [URL's](https://en.wikipedia.org/wiki/URL) host and scheme/protocol, + * leaving only the path, query and fragment, *without leading `/`* + * + * @see URI.relativize + */ +internal val HttpUrl.rawRelative: String? + get() { + val uri = toUri() + val relative = homepageUri.relativize(uri) + return when { + uri === relative -> null + else -> relative.toASCIIString() + } + } + +internal val reportPrefix: String + get() = """Error! Report on GitHub (tachiyomiorg/tachiyomi-extensions)""" + +/** Just throw an [error], which will get caught by Tachiyomi: the message will be exposed as a [toast][android.widget.Toast]. */ +internal inline fun reportErrorToUser(message: () -> String): Nothing { + error("""$reportPrefix: ${message()}""") +} + +/** Used when chapters don't have a [Language][DataExtractor.ChaptersTableColumnDataType.Language] column (if that ever happens). */ +internal const val UNKNOWN_LANGUAGE: String = "unknown" + +/** + * Actual Tachiyomi extension, ties everything together. + * + * Most of the work happens in [DataExtractor], [ProjectSukiAPI], [ProjectSukiFilters] and [ProjectSukiPreferences]. + * + * @author Federico d'Alonzo <me@npgx.dev> + */ @Suppress("unused") class ProjectSuki : HttpSource(), ConfigurableSource { + override val name: String = "Project Suki" - override val baseUrl: String = "https://projectsuki.com" - override val lang: String = "en" + override val baseUrl: String = homepageUri.toASCIIString() + override val lang: String = "all" + override val id: Long = 8965918600406781666L - private val preferences: SharedPreferences by lazy { - Injekt.get().getSharedPreferences("source_$id", 0x0000) + /** Handles extension preferences found in Extensions > Project Suki > Gear icon */ + private val preferences = ProjectSukiPreferences(id) + + /** See [Kotlinx-Serialization](https://github.com/Kotlin/kotlinx.serialization). */ + private val json: Json = Json { + ignoreUnknownKeys = true + explicitNulls = true + encodeDefaults = true } - private fun String.processLangPref(): List = split(",").map { it.trim().lowercase(Locale.US) } - - private val SharedPreferences.whitelistedLanguages: List - get() = getString(PS.PREFERENCE_WHITELIST_LANGUAGES, "")!! - .processLangPref() - - private val SharedPreferences.blacklistedLanguages: List - get() = getString(PS.PREFERENCE_BLACKLIST_LANGUAGES, "")!! - .processLangPref() - override fun setupPreferenceScreen(screen: PreferenceScreen) { - addRandomUAPreferenceToScreen(screen) + with(preferences) { screen.configure() } + } - screen.addPreference( - EditTextPreference(screen.context).apply { - key = PS.PREFERENCE_WHITELIST_LANGUAGES - title = PS.PREFERENCE_WHITELIST_LANGUAGES_TITLE - summary = PS.PREFERENCE_WHITELIST_LANGUAGES_SUMMARY - }, + /** + * [OkHttp's](https://square.github.io/okhttp/) [OkHttpClient] that handles network requests and responses. + * + * Thanks to Tachiyomi's [NetworkHelper](https://github.com/tachiyomiorg/tachiyomi/blob/58daedc89ee18d04e7af5bab12629680dba4096c/core/src/main/java/eu/kanade/tachiyomi/network/NetworkHelper.kt#L21C12-L21C12) + * (this is a permalink, check for updated version), + * most client options are already set as they should be, including the [Cache][okhttp3.Cache]. + */ + override val client: OkHttpClient = network.client.newBuilder() + .setRandomUserAgent( + userAgentType = preferences.shared.getPrefUAType(), + customUA = preferences.shared.getPrefCustomUA(), ) + .rateLimit(2, 1, TimeUnit.SECONDS) + .build() - screen.addPreference( - EditTextPreference(screen.context).apply { - key = PS.PREFERENCE_BLACKLIST_LANGUAGES - title = PS.PREFERENCE_BLACKLIST_LANGUAGES_TITLE - summary = PS.PREFERENCE_BLACKLIST_LANGUAGES_SUMMARY - }, + /** + * Specify what request will be sent to the server. + * + * This specific method returns a [GET](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) + * request to be sent to [https://projectsuki.com/browse](https://projectsuki.com/browse). + * + * Using the default [HttpSource]'s [Headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers). + */ + override fun popularMangaRequest(page: Int) = GET( + homepageUrl.newBuilder() + .addPathSegment("browse") + .addPathSegment((page - 1).toString()) // starts at 0 + .build(), + headers, + ) + + /** Whether or not this extension supports the "Latest" tab. */ + override val supportsLatest: Boolean get() = true + + /** Same concept as [popularMangaRequest], but is sent to [https://projectsuki.com/](https://projectsuki.com/). */ + override fun latestUpdatesRequest(page: Int) = GET(homepageUrl, headers) + + /** + * Utility to find and apply a filter specified by [T], + * see [reified](https://kotlinlang.org/docs/inline-functions.html#reified-type-parameters) + * if you're not familiar with the concept. + */ + private inline fun HttpUrl.Builder.applyPSFilter( + from: FilterList, + ): HttpUrl.Builder where T : Filter<*>, T : ProjectSukiFilters.ProjectSukiFilter = apply { + from.firstNotNullOfOrNull { it as? T }?.run { applyFilter() } + } + + /** + * Same concept as [popularMangaRequest], but is sent to [https://projectsuki.com/search](https://projectsuki.com/search). + * This is the [Full-Site][ProjectSukiFilters.SearchMode.FULL_SITE] variant of search, it *will* return results that have no chapters. + */ + override fun searchMangaRequest(page: Int, query: String, filters: FilterList): Request { + return GET( + homepageUrl.newBuilder() + .addPathSegment("search") + .addQueryParameter("page", (page - 1).toString()) + .addQueryParameter("q", query) + .applyPSFilter(from = filters) + .applyPSFilter(from = filters) + .applyPSFilter(from = filters) + .applyPSFilter(from = filters) + .build(), + headers, ) } - override val client: OkHttpClient = network.cloudflareClient.newBuilder() - .setRandomUserAgent( - userAgentType = preferences.getPrefUAType(), - customUA = preferences.getPrefCustomUA(), - filterInclude = listOf("chrome"), - ) - .rateLimit(4) - .build() - - override fun popularMangaRequest(page: Int) = GET(baseUrl, headers) - - // differentiating between popular and latest manga in the main page is - // *theoretically possible* but a pain, as such, this is fine "for now" + /** + * Handles the server's [Response] that was returned from [popularMangaRequest]'s [Request]. + * + * Because we asked the server for a webpage, it will return, in the [Request's body][okhttp3.RequestBody], + * the [html](https://developer.mozilla.org/en-US/docs/Web/HTML) that makes up that page, + * including any [css](https://developer.mozilla.org/en-US/docs/Web/CSS) and + * [JavaScript](https://developer.mozilla.org/en-US/docs/Web/JavaScript) in `