Implement search engine.

This commit is contained in:
NerdNumber9 2017-01-03 16:57:51 -05:00
parent c836f52460
commit 02c70d868d
11 changed files with 184 additions and 0 deletions

View File

@ -200,6 +200,9 @@ dependencies {
//SnappyDB (EH)
compile 'io.paperdb:paperdb:2.0'
//JVE (Regex) (EH)
compile 'ru.lanwen.verbalregex:java-verbal-expressions:1.4'
// Tests
//Paper DB screws up tests
/*testCompile 'junit:junit:4.12'

View File

@ -9,6 +9,7 @@ import eu.kanade.tachiyomi.data.source.online.OnlineSource
import exh.metadata.MetadataHelper
import exh.metadata.copyTo
import exh.metadata.models.ExGalleryMetadata
import exh.search.SearchEngine
import okhttp3.Response
import rx.Observable
@ -24,6 +25,8 @@ class EHentaiMetadata(override val id: Int,
val internalEx = EHentai(id - 2, exh, context)
val searchEngine = SearchEngine()
override val baseUrl: String
get() = throw UnsupportedOperationException()
override val lang: String
@ -105,8 +108,11 @@ class EHentaiMetadata(override val id: Int,
override fun fetchSearchManga(page: MangasPage, query: String, filters: List<Filter>)
= Observable.fromCallable {
val parsed = searchEngine.parseQuery(query)
page.mangas.addAll(sortedByTimeGalleries().filter { manga ->
filters.isEmpty() || filters.filter { it.id == manga.genre }.isNotEmpty()
}.filter {
searchEngine.matches(it, parsed)
}.mapToManga())
page
}!!

View File

@ -0,0 +1,5 @@
package exh
import ru.lanwen.verbalregex.VerbalExpression
fun VerbalExpression.Builder.anyChar() = add(".")!!

View File

@ -0,0 +1,3 @@
package exh.search
class MultiWildcard : TextComponent()

View File

@ -0,0 +1,4 @@
package exh.search
class Namespace(var namespace: Text,
var tag: Text? = null) : QueryComponent()

View File

@ -0,0 +1,6 @@
package exh.search
open class QueryComponent {
var excluded = false
var exact = false
}

View File

@ -0,0 +1,124 @@
package exh.search
import exh.metadata.models.ExGalleryMetadata
import exh.metadata.models.Tag
import ru.lanwen.verbalregex.VerbalExpression
import java.util.*
class SearchEngine {
//TODO Namespace alias
fun matches(metadata: ExGalleryMetadata, query: List<QueryComponent>): Boolean {
fun matchTagList(tags: List<Tag>,
component: Text,
builder: VerbalExpression.Builder,
built: VerbalExpression): Boolean {
//Match tags
val tagMatcher = if(!component.exact)
builder.anything().build()
else
built
//Match beginning of tag
if (tags.find {
tagMatcher.testExact(it.name)
} != null) {
if(component.excluded) return false
} else {
//No tag matched for this component
return false
}
return true
}
for(component in query) {
if(component is Text) {
val builder = component.asRegex()
val built = builder.build()
//Match title
if (built.test(metadata.title?.toLowerCase())
|| built.test(metadata.altTitle?.toLowerCase())) {
continue
}
//Match tags
if(!matchTagList(metadata.tags.entries.flatMap(MutableMap.MutableEntry<String, ArrayList<Tag>>::value),
component,
builder,
built))
return false
} else if(component is Namespace) {
//Match namespace
val ns = metadata.tags.entries.filter {
it.key == component.namespace.rawTextOnly()
}.flatMap { it.value }
//Match tags
val builder = component.tag!!.asRegex()
val built = builder.build()
if(!matchTagList(ns, component.tag!!, builder, built))
return false
}
}
return true
}
fun parseQuery(query: String): List<QueryComponent> {
val res = mutableListOf<QueryComponent>()
var inQuotes = false
val queuedRawText = StringBuilder()
val queuedText = mutableListOf<TextComponent>()
var namespace: Namespace? = null
var nextIsExcluded = false
var nextIsExact = false
fun flushText() {
if(queuedRawText.isNotEmpty()) {
queuedText += StringTextComponent(queuedRawText.toString())
queuedRawText.setLength(0)
}
}
fun flushToText() = Text().apply {
components += queuedText
queuedText.clear()
}
fun flushAll() {
flushText()
if (queuedText.isNotEmpty()) {
val component = namespace?.apply {
tag = flushToText()
} ?: flushToText()
component.excluded = nextIsExcluded
component.exact = nextIsExact
res += component
}
}
for(char in query.toLowerCase()) {
if(char == '"') {
inQuotes = !inQuotes
} else if(char == '?' || char == '_') {
flushText()
queuedText.add(SingleWildcard())
} else if(char == '*' || char == '%') {
flushText()
queuedText.add(MultiWildcard())
} else if(char == '-') {
nextIsExcluded = true
} else if(char == '$') {
nextIsExact = true
} else if(char == ':') {
flushText()
namespace = Namespace(flushToText(), null)
} else if(char == ' ' && !inQuotes) {
flushAll()
} else {
queuedRawText.append(char)
}
}
flushAll()
return res
}
}

View File

@ -0,0 +1,3 @@
package exh.search
class SingleWildcard : TextComponent()

View File

@ -0,0 +1,3 @@
package exh.search
class StringTextComponent(val value: String) : TextComponent()

View File

@ -0,0 +1,24 @@
package exh.search
import exh.anyChar
import ru.lanwen.verbalregex.VerbalExpression
class Text: QueryComponent() {
val components = mutableListOf<TextComponent>()
fun asRegex(): VerbalExpression.Builder {
val builder = VerbalExpression.regex()
for(component in components) {
when(component) {
is StringTextComponent -> builder.then(component.value)
is SingleWildcard -> builder.anyChar()
is MultiWildcard -> builder.zeroOrMore()
}
}
return builder
}
fun rawTextOnly() = components
.filter { it is StringTextComponent }
.joinToString(separator = "")
}

View File

@ -0,0 +1,3 @@
package exh.search
open class TextComponent