package exh.hitomi import eu.kanade.tachiyomi.network.GET import eu.kanade.tachiyomi.network.asObservable import eu.kanade.tachiyomi.network.asObservableSuccess import exh.metadata.metadata.HitomiSearchMetadata.Companion.LTN_BASE_URL import java.security.MessageDigest import okhttp3.Headers import okhttp3.OkHttpClient import okhttp3.Request import org.vepta.vdm.ByteCursor import rx.Observable import rx.Single private typealias HashedTerm = ByteArray private data class DataPair(val offset: Long, val length: Int) private data class Node( val keys: List, val datas: List, val subnodeAddresses: List ) /** * Kotlin port of the hitomi.la search algorithm * @author NerdNumber9 */ class HitomiNozomi( private val client: OkHttpClient, private val tagIndexVersion: Long, private val galleriesIndexVersion: Long ) { fun getGalleryIdsForQuery(query: String): Single> { val replacedQuery = query.replace('_', ' ') if (':' in replacedQuery) { val sides = replacedQuery.split(':') val namespace = sides[0] var tag = sides[1] var area: String? = namespace var language = "all" if (namespace == "female" || namespace == "male") { area = "tag" tag = replacedQuery } else if (namespace == "language") { area = null language = tag tag = "index" } return getGalleryIdsFromNozomi(area, tag, language) } val key = hashTerm(query) val field = "galleries" return getNodeAtAddress(field, 0).flatMap { node -> if (node == null) { Single.just(null) } else { BSearch(field, key, node).flatMap { data -> if (data == null) { Single.just(null) } else { getGalleryIdsFromData(data) } } } } } private fun getGalleryIdsFromData(data: DataPair?): Single> { if (data == null) { return Single.just(emptyList()) } val url = "$LTN_BASE_URL/$GALLERIES_INDEX_DIR/galleries.$galleriesIndexVersion.data" val (offset, length) = data if (length > 100000000 || length <= 0) { return Single.just(emptyList()) } return client.newCall(rangedGet(url, offset, offset + length - 1)) .asObservable() .map { it.body?.bytes() ?: ByteArray(0) } .onErrorReturn { ByteArray(0) } .map { inbuf -> if (inbuf.isEmpty()) { return@map emptyList() } val view = ByteCursor(inbuf) val numberOfGalleryIds = view.nextInt() val expectedLength = numberOfGalleryIds * 4 + 4 if (numberOfGalleryIds > 10000000 || numberOfGalleryIds <= 0 || inbuf.size != expectedLength ) { return@map emptyList() } (1..numberOfGalleryIds).map { view.nextInt() } }.toSingle() } private fun BSearch(field: String, key: ByteArray, node: Node?): Single { fun compareByteArrays(dv1: ByteArray, dv2: ByteArray): Int { val top = dv1.size.coerceAtMost(dv2.size) for (i in 0 until top) { val dv1i = dv1[i].toInt() and 0xFF val dv2i = dv2[i].toInt() and 0xFF if (dv1i < dv2i) { return -1 } else if (dv1i > dv2i) { return 1 } } return 0 } fun locateKey(key: ByteArray, node: Node): Pair { var cmpResult = -1 var lastI = 0 for (nodeKey in node.keys) { cmpResult = compareByteArrays(key, nodeKey) if (cmpResult <= 0) break lastI++ } return (cmpResult == 0) to lastI } fun isLeaf(node: Node): Boolean { return !node.subnodeAddresses.any { it != 0L } } if (node == null || node.keys.isEmpty()) { return Single.just(null) } val (there, where) = locateKey(key, node) if (there) { return Single.just(node.datas[where]) } else if (isLeaf(node)) { return Single.just(null) } return getNodeAtAddress(field, node.subnodeAddresses[where]).flatMap { newNode -> BSearch(field, key, newNode) } } private fun decodeNode(data: ByteArray): Node { val view = ByteCursor(data) val numberOfKeys = view.nextInt() val keys = (1..numberOfKeys).map { val keySize = view.nextInt() view.next(keySize) } val numberOfDatas = view.nextInt() val datas = (1..numberOfDatas).map { val offset = view.nextLong() val length = view.nextInt() DataPair(offset, length) } val numberOfSubnodeAddresses = B + 1 val subnodeAddresses = (1..numberOfSubnodeAddresses).map { view.nextLong() } return Node(keys, datas, subnodeAddresses) } private fun getNodeAtAddress(field: String, address: Long): Single { var url = "$LTN_BASE_URL/$INDEX_DIR/$field.$tagIndexVersion.index" if (field == "galleries") { url = "$LTN_BASE_URL/$GALLERIES_INDEX_DIR/galleries.$galleriesIndexVersion.index" } return client.newCall(rangedGet(url, address, address + MAX_NODE_SIZE - 1)) .asObservableSuccess() .map { it.body?.bytes() ?: ByteArray(0) } .onErrorReturn { ByteArray(0) } .map { nodedata -> if (nodedata.isNotEmpty()) { decodeNode(nodedata) } else null }.toSingle() } fun getGalleryIdsFromNozomi(area: String?, tag: String, language: String): Single> { var nozomiAddress = "$LTN_BASE_URL/$COMPRESSED_NOZOMI_PREFIX/$tag-$language$NOZOMI_EXTENSION" if (area != null) { nozomiAddress = "$LTN_BASE_URL/$COMPRESSED_NOZOMI_PREFIX/$area/$tag-$language$NOZOMI_EXTENSION" } return client.newCall( Request.Builder() .url(nozomiAddress) .build() ) .asObservableSuccess() .map { resp -> val body = resp.body!!.bytes() val cursor = ByteCursor(body) (1..body.size / 4).map { cursor.nextInt() } }.toSingle() } private fun hashTerm(query: String): HashedTerm { val md = MessageDigest.getInstance("SHA-256") md.update(query.toByteArray(HASH_CHARSET)) return md.digest().copyOf(4) } companion object { private const val INDEX_DIR = "tagindex" private const val GALLERIES_INDEX_DIR = "galleriesindex" private const val COMPRESSED_NOZOMI_PREFIX = "n" private const val NOZOMI_EXTENSION = ".nozomi" private const val MAX_NODE_SIZE = 464 private const val B = 16 private val HASH_CHARSET = Charsets.UTF_8 fun rangedGet(url: String, rangeBegin: Long, rangeEnd: Long?): Request { return GET( url, Headers.Builder() .add("Range", "bytes=$rangeBegin-${rangeEnd ?: ""}") .build() ) } fun getIndexVersion(httpClient: OkHttpClient, name: String): Observable { return httpClient.newCall(GET("$LTN_BASE_URL/$name/version?_=${System.currentTimeMillis()}")) .asObservableSuccess() .map { it.body!!.string().toLong() } } } }