From 02c70d868d160c03ab3d58efd1f07ab30257ac0e Mon Sep 17 00:00:00 2001 From: NerdNumber9 Date: Tue, 3 Jan 2017 16:57:51 -0500 Subject: [PATCH] Implement search engine. --- app/build.gradle | 3 + .../data/source/online/all/EHentaiMetadata.kt | 6 + .../java/exh/VerbelExpressionExtensions.kt | 5 + app/src/main/java/exh/search/MultiWildcard.kt | 3 + app/src/main/java/exh/search/Namespace.kt | 4 + .../main/java/exh/search/QueryComponent.kt | 6 + app/src/main/java/exh/search/SearchEngine.kt | 124 ++++++++++++++++++ .../main/java/exh/search/SingleWildcard.kt | 3 + .../java/exh/search/StringTextComponent.kt | 3 + app/src/main/java/exh/search/Text.kt | 24 ++++ app/src/main/java/exh/search/TextComponent.kt | 3 + 11 files changed, 184 insertions(+) create mode 100644 app/src/main/java/exh/VerbelExpressionExtensions.kt create mode 100644 app/src/main/java/exh/search/MultiWildcard.kt create mode 100644 app/src/main/java/exh/search/Namespace.kt create mode 100644 app/src/main/java/exh/search/QueryComponent.kt create mode 100644 app/src/main/java/exh/search/SearchEngine.kt create mode 100644 app/src/main/java/exh/search/SingleWildcard.kt create mode 100644 app/src/main/java/exh/search/StringTextComponent.kt create mode 100644 app/src/main/java/exh/search/Text.kt create mode 100644 app/src/main/java/exh/search/TextComponent.kt diff --git a/app/build.gradle b/app/build.gradle index b30fd1cdb..3bf1b1a3b 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -200,6 +200,9 @@ dependencies { //SnappyDB (EH) compile 'io.paperdb:paperdb:2.0' + //JVE (Regex) (EH) + compile 'ru.lanwen.verbalregex:java-verbal-expressions:1.4' + // Tests //Paper DB screws up tests /*testCompile 'junit:junit:4.12' diff --git a/app/src/main/java/eu/kanade/tachiyomi/data/source/online/all/EHentaiMetadata.kt b/app/src/main/java/eu/kanade/tachiyomi/data/source/online/all/EHentaiMetadata.kt index 0564882a3..fc2eae524 100644 --- a/app/src/main/java/eu/kanade/tachiyomi/data/source/online/all/EHentaiMetadata.kt +++ b/app/src/main/java/eu/kanade/tachiyomi/data/source/online/all/EHentaiMetadata.kt @@ -9,6 +9,7 @@ import eu.kanade.tachiyomi.data.source.online.OnlineSource import exh.metadata.MetadataHelper import exh.metadata.copyTo import exh.metadata.models.ExGalleryMetadata +import exh.search.SearchEngine import okhttp3.Response import rx.Observable @@ -24,6 +25,8 @@ class EHentaiMetadata(override val id: Int, val internalEx = EHentai(id - 2, exh, context) + val searchEngine = SearchEngine() + override val baseUrl: String get() = throw UnsupportedOperationException() override val lang: String @@ -105,8 +108,11 @@ class EHentaiMetadata(override val id: Int, override fun fetchSearchManga(page: MangasPage, query: String, filters: List) = Observable.fromCallable { + val parsed = searchEngine.parseQuery(query) page.mangas.addAll(sortedByTimeGalleries().filter { manga -> filters.isEmpty() || filters.filter { it.id == manga.genre }.isNotEmpty() + }.filter { + searchEngine.matches(it, parsed) }.mapToManga()) page }!! diff --git a/app/src/main/java/exh/VerbelExpressionExtensions.kt b/app/src/main/java/exh/VerbelExpressionExtensions.kt new file mode 100644 index 000000000..12f060076 --- /dev/null +++ b/app/src/main/java/exh/VerbelExpressionExtensions.kt @@ -0,0 +1,5 @@ +package exh + +import ru.lanwen.verbalregex.VerbalExpression + +fun VerbalExpression.Builder.anyChar() = add(".")!! diff --git a/app/src/main/java/exh/search/MultiWildcard.kt b/app/src/main/java/exh/search/MultiWildcard.kt new file mode 100644 index 000000000..b5cecbe57 --- /dev/null +++ b/app/src/main/java/exh/search/MultiWildcard.kt @@ -0,0 +1,3 @@ +package exh.search + +class MultiWildcard : TextComponent() diff --git a/app/src/main/java/exh/search/Namespace.kt b/app/src/main/java/exh/search/Namespace.kt new file mode 100644 index 000000000..8c939ac96 --- /dev/null +++ b/app/src/main/java/exh/search/Namespace.kt @@ -0,0 +1,4 @@ +package exh.search + +class Namespace(var namespace: Text, + var tag: Text? = null) : QueryComponent() diff --git a/app/src/main/java/exh/search/QueryComponent.kt b/app/src/main/java/exh/search/QueryComponent.kt new file mode 100644 index 000000000..bfc34eea0 --- /dev/null +++ b/app/src/main/java/exh/search/QueryComponent.kt @@ -0,0 +1,6 @@ +package exh.search + +open class QueryComponent { + var excluded = false + var exact = false +} \ No newline at end of file diff --git a/app/src/main/java/exh/search/SearchEngine.kt b/app/src/main/java/exh/search/SearchEngine.kt new file mode 100644 index 000000000..cf7eaecf1 --- /dev/null +++ b/app/src/main/java/exh/search/SearchEngine.kt @@ -0,0 +1,124 @@ +package exh.search + +import exh.metadata.models.ExGalleryMetadata +import exh.metadata.models.Tag +import ru.lanwen.verbalregex.VerbalExpression +import java.util.* + +class SearchEngine { + //TODO Namespace alias + fun matches(metadata: ExGalleryMetadata, query: List): Boolean { + + fun matchTagList(tags: List, + component: Text, + builder: VerbalExpression.Builder, + built: VerbalExpression): Boolean { + //Match tags + val tagMatcher = if(!component.exact) + builder.anything().build() + else + built + //Match beginning of tag + if (tags.find { + tagMatcher.testExact(it.name) + } != null) { + if(component.excluded) return false + } else { + //No tag matched for this component + return false + } + return true + } + + for(component in query) { + if(component is Text) { + val builder = component.asRegex() + val built = builder.build() + //Match title + if (built.test(metadata.title?.toLowerCase()) + || built.test(metadata.altTitle?.toLowerCase())) { + continue + } + //Match tags + if(!matchTagList(metadata.tags.entries.flatMap(MutableMap.MutableEntry>::value), + component, + builder, + built)) + return false + } else if(component is Namespace) { + //Match namespace + val ns = metadata.tags.entries.filter { + it.key == component.namespace.rawTextOnly() + }.flatMap { it.value } + //Match tags + val builder = component.tag!!.asRegex() + val built = builder.build() + if(!matchTagList(ns, component.tag!!, builder, built)) + return false + } + } + return true + } + + fun parseQuery(query: String): List { + val res = mutableListOf() + + var inQuotes = false + val queuedRawText = StringBuilder() + val queuedText = mutableListOf() + var namespace: Namespace? = null + + var nextIsExcluded = false + var nextIsExact = false + + fun flushText() { + if(queuedRawText.isNotEmpty()) { + queuedText += StringTextComponent(queuedRawText.toString()) + queuedRawText.setLength(0) + } + } + + fun flushToText() = Text().apply { + components += queuedText + queuedText.clear() + } + + fun flushAll() { + flushText() + if (queuedText.isNotEmpty()) { + val component = namespace?.apply { + tag = flushToText() + } ?: flushToText() + component.excluded = nextIsExcluded + component.exact = nextIsExact + res += component + } + } + + for(char in query.toLowerCase()) { + if(char == '"') { + inQuotes = !inQuotes + } else if(char == '?' || char == '_') { + flushText() + queuedText.add(SingleWildcard()) + } else if(char == '*' || char == '%') { + flushText() + queuedText.add(MultiWildcard()) + } else if(char == '-') { + nextIsExcluded = true + } else if(char == '$') { + nextIsExact = true + } else if(char == ':') { + flushText() + namespace = Namespace(flushToText(), null) + } else if(char == ' ' && !inQuotes) { + flushAll() + } else { + queuedRawText.append(char) + } + } + flushAll() + + return res + } +} diff --git a/app/src/main/java/exh/search/SingleWildcard.kt b/app/src/main/java/exh/search/SingleWildcard.kt new file mode 100644 index 000000000..503d751e1 --- /dev/null +++ b/app/src/main/java/exh/search/SingleWildcard.kt @@ -0,0 +1,3 @@ +package exh.search + +class SingleWildcard : TextComponent() diff --git a/app/src/main/java/exh/search/StringTextComponent.kt b/app/src/main/java/exh/search/StringTextComponent.kt new file mode 100644 index 000000000..736f8c225 --- /dev/null +++ b/app/src/main/java/exh/search/StringTextComponent.kt @@ -0,0 +1,3 @@ +package exh.search + +class StringTextComponent(val value: String) : TextComponent() diff --git a/app/src/main/java/exh/search/Text.kt b/app/src/main/java/exh/search/Text.kt new file mode 100644 index 000000000..63acb9e83 --- /dev/null +++ b/app/src/main/java/exh/search/Text.kt @@ -0,0 +1,24 @@ +package exh.search + +import exh.anyChar +import ru.lanwen.verbalregex.VerbalExpression + +class Text: QueryComponent() { + val components = mutableListOf() + + fun asRegex(): VerbalExpression.Builder { + val builder = VerbalExpression.regex() + for(component in components) { + when(component) { + is StringTextComponent -> builder.then(component.value) + is SingleWildcard -> builder.anyChar() + is MultiWildcard -> builder.zeroOrMore() + } + } + return builder + } + + fun rawTextOnly() = components + .filter { it is StringTextComponent } + .joinToString(separator = "") +} diff --git a/app/src/main/java/exh/search/TextComponent.kt b/app/src/main/java/exh/search/TextComponent.kt new file mode 100644 index 000000000..9b50051f5 --- /dev/null +++ b/app/src/main/java/exh/search/TextComponent.kt @@ -0,0 +1,3 @@ +package exh.search + +open class TextComponent