diff --git a/src/pt/mangahost/build.gradle b/src/pt/mangahost/build.gradle index 63671a598..e3c6d6b42 100644 --- a/src/pt/mangahost/build.gradle +++ b/src/pt/mangahost/build.gradle @@ -5,12 +5,8 @@ ext { appName = 'Tachiyomi: MangaHost' pkgNameSuffix = 'pt.mangahost' extClass = '.MangaHost' - extVersionCode = 1 + extVersionCode = 2 libVersion = '1.2' } -sourceSets { - main.java.srcDirs += 'src/eu/kanade/tachiyomi/extension/pt/mangahost' -} - apply from: "$rootDir/common.gradle" diff --git a/src/pt/mangahost/src/eu/kanade/tachiyomi/extension/pt/mangahost/MangaHost.kt b/src/pt/mangahost/src/eu/kanade/tachiyomi/extension/pt/mangahost/MangaHost.kt index 35a89b1c3..afdde1daf 100644 --- a/src/pt/mangahost/src/eu/kanade/tachiyomi/extension/pt/mangahost/MangaHost.kt +++ b/src/pt/mangahost/src/eu/kanade/tachiyomi/extension/pt/mangahost/MangaHost.kt @@ -1,17 +1,15 @@ package eu.kanade.tachiyomi.extension.pt.mangahost - import eu.kanade.tachiyomi.network.GET - import eu.kanade.tachiyomi.source.model.* - import eu.kanade.tachiyomi.source.online.ParsedHttpSource - import eu.kanade.tachiyomi.util.asJsoup - import okhttp3.* - import org.jsoup.nodes.Document - import org.jsoup.nodes.Element - import java.text.ParseException - import java.text.SimpleDateFormat - import java.util.* - import java.util.regex.Matcher - import java.util.regex.Pattern +import eu.kanade.tachiyomi.network.GET +import eu.kanade.tachiyomi.source.model.* +import eu.kanade.tachiyomi.source.online.ParsedHttpSource +import okhttp3.* +import org.jsoup.Jsoup +import org.jsoup.nodes.Document +import org.jsoup.nodes.Element +import java.text.ParseException +import java.text.SimpleDateFormat +import java.util.Locale class MangaHost : ParsedHttpSource() { @@ -23,70 +21,75 @@ class MangaHost : ParsedHttpSource() { override val supportsLatest = true - private val langRegex: String = "( )?\\(Pt-Br\\)" + private val catalogHeaders = Headers.Builder().apply { + add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36") + add("Host", "mangahost1.com") + add("Referer", baseUrl) + }.build() - override fun popularMangaSelector(): String = "a.pull-left" + private fun mangaFromElement(element: Element, lazy: Boolean = true): SManga = SManga.create().apply { + title = element.attr("title").replace(LANG_REGEX.toRegex(), "") + thumbnail_url = element.select("img.manga").attr(if (lazy) "data-path" else "src") + .replace(IMAGE_REGEX.toRegex(), "_large.") + setUrlWithoutDomain(element.attr("href")) + } + + override fun popularMangaSelector(): String = "div.thumbnail div a.pull-left" override fun popularMangaRequest(page: Int): Request { - val pageStr = if (page != 1) "/$page" else "" - return GET("$baseUrl/mangas/mais-visualizados$pageStr", headers) + val pageStr = if (page != 1) "/page/$page" else "" + return GET("$baseUrl/mangas/mais-visualizados$pageStr", catalogHeaders) } - override fun popularMangaFromElement(element: Element) : SManga { - val manga = SManga.create() - element.select("a").last().let { - manga.setUrlWithoutDomain(it.attr("href")) - manga.title = element.getElementsByClass("manga").attr("alt") - } + override fun popularMangaFromElement(element: Element): SManga = mangaFromElement(element) - return manga - } + override fun popularMangaNextPageSelector() = "div.wp-pagenavi:has(a.nextpostslink)" - override fun popularMangaNextPageSelector() = ".paginador.wp-pagenavi a:contains(next)" - - override fun latestUpdatesSelector() = "div.thumbnail" + override fun latestUpdatesSelector() = "table.table-lancamentos > tbody > tr > td:eq(0) > a" override fun latestUpdatesRequest(page: Int): Request { - val pageStr = if (page != 1) "/$page" else "" - return GET("$baseUrl/mangas/novos$pageStr", headers) -} - - override fun latestUpdatesFromElement(element: Element): SManga { - return popularMangaFromElement(element) + val pageStr = if (page != 1) "/page/$page" else "" + return GET("$baseUrl/lancamentos$pageStr", catalogHeaders) } - override fun latestUpdatesNextPageSelector() = "paginador.wp-pagenavi a:contains(next)" + override fun latestUpdatesFromElement(element: Element): SManga = mangaFromElement(element, false) + + override fun latestUpdatesNextPageSelector() = popularMangaNextPageSelector() override fun searchMangaRequest(page: Int, query: String, filters: FilterList): Request { - return GET("$baseUrl"+"find/$query", headers) + // The site sometimes recognize it's a crawler and return only a + // manga called "Robot" if the "find/$query" is used directly. + return GET("$baseUrl/find/?this=$query", catalogHeaders) } - override fun searchMangaSelector() = ".table-hover tr" - - override fun searchMangaFromElement(element: Element): SManga { - return popularMangaFromElement(element) - } + override fun searchMangaSelector() = "table.table-search > tbody > tr > td:eq(0) > a" + override fun searchMangaFromElement(element: Element): SManga = mangaFromElement(element) override fun searchMangaNextPageSelector() = null override fun mangaDetailsParse(document: Document): SManga { - val infoElement = document.select("div.margin-bottom-20").first() + val infoElement = document.select("div#page > section > div > div.pull-left") val manga = SManga.create() - val author = infoElement.select("li:contains(Autor: )").text() + + val author = infoElement.select("li:contains(Autor:)").text() manga.author = removeLabel(author) - val artist = infoElement.select("li:contains(Desenho (Art): )").text() + val artist = infoElement.select("li:contains(Desenho (Art):)").text() manga.artist = removeLabel(artist) - val genre = infoElement.select("li:contains(Categoria(s): )").text() + val genre = infoElement.select("li:contains(Categoria(s):)").text() manga.genre = removeLabel(genre) - manga.description = infoElement.select("div#divSpdInText").text() + // Some mangas like Shingeki no Kyojin have some links in description. + manga.description = infoElement.select("article").first() + ?.text()?.substringBefore("Relacionados:") - manga.status = infoElement.select("li:contains(Status: )").first()?.text().orEmpty().let { parseStatus(it) } - manga.thumbnail_url = infoElement.select(".thumbnail").first()?.attr("src") + manga.status = infoElement.select("li:contains(Status:)").text() + .orEmpty().let { parseStatus(it) } + + manga.thumbnail_url = document.select("div#page > section > div > img.thumbnail").attr("src") return manga } @@ -97,87 +100,79 @@ class MangaHost : ParsedHttpSource() { else -> SManga.UNKNOWN } - private fun removeLabel(text: String?): String { - return text!!.substring(text!!.indexOf(":") + 1) - } + private fun removeLabel(text: String?): String = text!!.substringAfter(":") - override fun chapterListSelector() :String { - return "section.clearfix.margin-bottom-20 ul.list_chapters li," + - "section.clearfix.margin-bottom-20 table.table-hover.table-condensed tbody tr" - } + override fun chapterListSelector(): String + = "ul.list_chapters li a," + + "table.table-hover:not(.table-mangas) > tbody > tr" override fun chapterFromElement(element: Element): SChapter { + val isNewLayout = element.tagName() == "a" - val chapter: SChapter = SChapter.create() + if (isNewLayout) { + val content = Jsoup.parse(element.attr("data-content")) + val date = content.select("small.clearfix").text().substringAfter("Adicionado em ") - if (element.`is`("li")){ - - val urlChapterLong = element.select("a").toString() - val p2 = Pattern.compile("(?<=href=\\')(.+?)(?=\\')") - val p1 = Pattern.compile("(?<=Ler Online - )(.+)(?=\\[)") - val p3 = Pattern.compile("(?<=Adicionado em )(.+)(?=\\<)") - val p4 = Pattern.compile("(?<=Traduzido por \\<strong\\>\t\t\t\t\t\t)(.+)(?=\t\t\t\t\\<\\/)") - val m2 = p2.matcher(urlChapterLong) - val m1 = p1.matcher(urlChapterLong) - val m3 = p3.matcher(urlChapterLong) - val m4 = p4.matcher(urlChapterLong) - - if (m2.find() && m1.find() && m3.find() && m4.find()) { - chapter.setUrlWithoutDomain(m2.group()) - chapter.name = m1.group().toString() - chapter.date_upload = m3.group().let { parseChapterDate(it) } - chapter.scanlator = m4.group().toString() + return SChapter.create().apply { + name = element.attr("data-original-title").replace(LANG_REGEX.toRegex(), "") + scanlator = content.select("small.clearfix strong").text() + date_upload = parseChapterDate(date, DATE_FORMAT_NEW) + chapter_number = element.text().toFloatOrNull() ?: 1f + setUrlWithoutDomain(content.select("div.clearfix a").attr("href")) } } - else { + val firstColumn = element.select("td:eq(0)") + val secondColumn = element.select("td:eq(1)") + val thirdColumn = element.select("td:eq(2)") - val urlElement = element.select("a.capitulo").first() - chapter.setUrlWithoutDomain(element.select("a").first().attr("href")) - chapter.name = urlElement.text() - chapter.date_upload = element.select("td:eq(2)").text()?.let { parseChapterDate2(it) }?:0 - chapter.scanlator = element.select("td:eq(1) a").attr("title") + return SChapter.create().apply { + name = firstColumn.select("a").text().replace(LANG_REGEX.toRegex(), "") + scanlator = secondColumn.text() + date_upload = parseChapterDate(thirdColumn.text(), DATE_FORMAT_OLD) + setUrlWithoutDomain(firstColumn.select("a").attr("href")) } - return chapter } - - private fun parseChapterDate(date: String) : Long { + private fun parseChapterDate(date: String, format: String) : Long { return try { - SimpleDateFormat("MMM d, yyyy", Locale.ENGLISH).parse(date).time + SimpleDateFormat(format, Locale.ENGLISH).parse(date).time } catch (e: ParseException) { 0L } } - private fun parseChapterDate2(date: String) : Long { - return try { - SimpleDateFormat("dd/MM/yyyy", Locale.ENGLISH).parse(date).time - } catch (e: ParseException) { - 0L - } + override fun pageListRequest(chapter: SChapter): Request { + // Just to prevent the detection of the crawler. + val newHeader = catalogHeaders.newBuilder() + .set("Referer", "$baseUrl${chapter.url}".substringBeforeLast("/")) + .build() + + return GET(baseUrl + chapter.url, newHeader) } override fun pageListParse(document: Document): List<Page> { + var documentStr = document.toString() + var images = documentStr.substringAfter(SCRIPT_BEGIN).substringBefore(SCRIPT_END) + .replace(SCRIPT_REGEX.toRegex(), "") - val pages = mutableListOf<Page>() - var m : Matcher - var p : Pattern - val pageSize= mutableListOf<String>() - val links = document.select("script").toString() - document.select("div.pull-right > select.pages").first().getElementsByTag("option").forEach{ - pageSize.add(it.attr("value")) - } + var newDocument = Jsoup.parse(images) - for (i in 1 until pageSize.size+1) { - p = Pattern.compile("(?<=\\'img_$i'\\ssrc=')(.+?)(?='\\s)") - m = p.matcher(links) - m.find() - pages.add(Page(i-1, "", m.group(1))) - } - return pages + return newDocument.select("a img") + .mapIndexed { i, el -> Page(i, "", el.attr("src")) } } override fun imageUrlParse(document: Document) = "" + companion object { + private const val LANG_REGEX = "( )?\\((PT-)?BR\\)" + private const val IMAGE_REGEX = "_(small|medium)\\." + + private const val DATE_FORMAT_OLD = "dd/MM/yyyy" + private const val DATE_FORMAT_NEW = "MMM d, yyyy" + + private const val SCRIPT_BEGIN = "var images = [" + private const val SCRIPT_END = "];" + private const val SCRIPT_REGEX = "\"|," + } }