MMRCMS - refactor latest parsing, fix Mangas.pw (#4201)

This commit is contained in:
Mike 2020-08-24 22:39:29 -04:00 committed by GitHub
parent 04574b2084
commit 1a86045f3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 14 deletions

View File

@ -5,7 +5,7 @@ ext {
extName = 'My Manga Reader CMS (Many sources)' extName = 'My Manga Reader CMS (Many sources)'
pkgNameSuffix = 'all.mmrcms' pkgNameSuffix = 'all.mmrcms'
extClass = '.MyMangaReaderCMSSources' extClass = '.MyMangaReaderCMSSources'
extVersionCode = 50 extVersionCode = 51
libVersion = '1.2' libVersion = '1.2'
containsNsfw = true containsNsfw = true
} }

View File

@ -139,26 +139,35 @@ open class MyMangaReaderCMSSource(
val mangas = document.select(latestUpdatesSelector()) val mangas = document.select(latestUpdatesSelector())
.let { elements -> .let { elements ->
if (elements.select("a").firstOrNull()?.hasText() == true) { when {
elements.map { latestUpdatesFromElement(it) } // Mangas.pw
} else { elements.select("a.fa-info-circle + a").firstOrNull()?.hasText() == true -> elements.map { latestUpdatesFromElement(it, "a.fa-info-circle + a") }
document.select(gridLatestUpdatesSelector()).map { gridLatestUpdatesFromElement(it) } // List layout (most sources)
elements.select("a").firstOrNull()?.hasText() == true -> elements.map { latestUpdatesFromElement(it, "a") }
// Grid layout (e.g. MangaYu and MangaID)
else -> document.select(gridLatestUpdatesSelector()).map { gridLatestUpdatesFromElement(it) }
} }
} }
.distinctBy { manga -> manga.title } .filterNotNull()
.filterNot { manga -> manga.title in latestTitles }
.also { list -> latestTitles.addAll(list.map { it.title }) }
return MangasPage(mangas, document.select(latestUpdatesNextPageSelector()) != null) return MangasPage(mangas, document.select(latestUpdatesNextPageSelector()) != null)
} }
private fun latestUpdatesSelector() = "div.mangalist div.manga-item" private fun latestUpdatesSelector() = "div.mangalist div.manga-item"
private fun latestUpdatesNextPageSelector() = "a[rel=next]" private fun latestUpdatesNextPageSelector() = "a[rel=next]"
private fun latestUpdatesFromElement(element: Element): SManga = SManga.create().apply { private fun latestUpdatesFromElement(element: Element, urlSelector: String): SManga? {
url = element.select("a").first().attr("abs:href").substringAfter(baseUrl) // intentionally not using setUrlWithoutDomain return element.select(urlSelector).first().let { titleElement ->
title = element.select("a").first().text().trim() if (titleElement.text() in latestTitles) {
null
} else {
latestTitles.add(titleElement.text())
SManga.create().apply {
url = titleElement.attr("abs:href").substringAfter(baseUrl) // intentionally not using setUrlWithoutDomain
title = titleElement.text().trim()
thumbnail_url = "$baseUrl/uploads/manga/${url.substringAfterLast('/')}/cover/cover_250x350.jpg" thumbnail_url = "$baseUrl/uploads/manga/${url.substringAfterLast('/')}/cover/cover_250x350.jpg"
} }
// MangaYu and MangaID needs this }
}
}
private fun gridLatestUpdatesSelector() = "div.mangalist div.manga-item, div.grid-manga tr" private fun gridLatestUpdatesSelector() = "div.mangalist div.manga-item, div.grid-manga tr"
private fun gridLatestUpdatesFromElement(element: Element): SManga = SManga.create().apply { private fun gridLatestUpdatesFromElement(element: Element): SManga = SManga.create().apply {
element.select("a.chart-title").let { element.select("a.chart-title").let {
@ -308,7 +317,7 @@ open class MyMangaReaderCMSSource(
val chapter = SChapter.create() val chapter = SChapter.create()
try { try {
val titleWrapper = if (name == "Mangas.pw") element.select("em a[alt]").first() else element.select("[class^=chapter-title-rtl]").first() val titleWrapper = if (name == "Mangas.pw") element.select("i a").first() else element.select("[class^=chapter-title-rtl]").first()
// Some websites add characters after "..-rtl" thus the need of checking classes that starts with that // Some websites add characters after "..-rtl" thus the need of checking classes that starts with that
val url = titleWrapper.getElementsByTag("a") val url = titleWrapper.getElementsByTag("a")
.first { it.attr("href").contains(urlRegex) } .first { it.attr("href").contains(urlRegex) }