gallery.accords-library.com/server/szurubooru/search/parser.py

import re

from szurubooru import errors
from szurubooru.search import criteria, tokens
from szurubooru.search.configs import util
from szurubooru.search.query import SearchQuery


def _create_criterion(
    original_value: str, value: str
) -> criteria.BaseCriterion:
    if re.search(r"(?<!\\),", value):
        values = re.split(r"(?<!\\),", value)
        if any(not term.strip() for term in values):
            raise errors.SearchError("Empty compound value")
        return criteria.ArrayCriterion(original_value, values)
    if re.search(r"(?<!\\)\.(?<!\\)\.", value):
        low, high = re.split(r"(?<!\\)\.(?<!\\)\.", value, 1)
        if not low and not high:
            raise errors.SearchError("Empty ranged value")
        return criteria.RangedCriterion(original_value, low, high)
    return criteria.PlainCriterion(original_value, value)


def _parse_anonymous(value: str, negated: bool) -> tokens.AnonymousToken:
    criterion = _create_criterion(value, value)
    return tokens.AnonymousToken(criterion, negated)


def _parse_named(key: str, value: str, negated: bool) -> tokens.NamedToken:
    original_value = value
    if key.endswith("-min"):
        key = key[:-4]
        value += ".."
    elif key.endswith("-max"):
        key = key[:-4]
        value = ".." + value
    criterion = _create_criterion(original_value, value)
    return tokens.NamedToken(key, criterion, negated)


def _parse_special(value: str, negated: bool) -> tokens.SpecialToken:
    return tokens.SpecialToken(value, negated)


def _parse_sort(value: str, negated: bool) -> tokens.SortToken:
    if value.count(",") == 0:
        order_str = None
    elif value.count(",") == 1:
        value, order_str = value.split(",")
    else:
        raise errors.SearchError("Too many commas in sort style token.")
    try:
        order = {
            "asc": tokens.SortToken.SORT_ASC,
            "desc": tokens.SortToken.SORT_DESC,
            "": tokens.SortToken.SORT_DEFAULT,
            None: tokens.SortToken.SORT_DEFAULT,
        }[order_str]
    except KeyError:
        raise errors.SearchError("Unknown search direction: %r." % order_str)
    if negated:
        order = {
            tokens.SortToken.SORT_ASC: tokens.SortToken.SORT_DESC,
            tokens.SortToken.SORT_DESC: tokens.SortToken.SORT_ASC,
            tokens.SortToken.SORT_DEFAULT: tokens.SortToken.SORT_NEGATED_DEFAULT,  # noqa: E501
            tokens.SortToken.SORT_NEGATED_DEFAULT: tokens.SortToken.SORT_DEFAULT,  # noqa: E501
        }[order]
    return tokens.SortToken(value, order)


class Parser:
    def parse(self, query_text: str) -> SearchQuery:
        query = SearchQuery()
        for chunk in re.split(r"\s+", (query_text or "").lower()):
            if not chunk:
                continue
            negated = False
            if chunk[0] == "-":
                chunk = chunk[1:]
                negated = True
            if not chunk:
                raise errors.SearchError("Empty negated token.")
            match = re.match(r"^(.*?)(?<!\\):(.*)$", chunk)
            if match:
                key, value = list(match.groups())
                key = util.unescape(key)
                if key == "sort":
                    query.sort_tokens.append(_parse_sort(value, negated))
                elif key == "special":
                    query.special_tokens.append(_parse_special(value, negated))
                else:
                    query.named_tokens.append(
                        _parse_named(key, value, negated)
                    )
            else:
                query.anonymous_tokens.append(_parse_anonymous(chunk, negated))
        return query
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`import re`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`from szurubooru import errors`
			`from szurubooru.search import criteria, tokens`
server/search: add search term escaping 2017-04-24 19:51:49 +00:00			`from szurubooru.search.configs import util`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`from szurubooru.search.query import SearchQuery`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def _create_criterion(`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`original_value: str, value: str`
			`) -> criteria.BaseCriterion:`
			`if re.search(r"(?<!\\),", value):`
			`values = re.split(r"(?<!\\),", value)`
server/search: add search term escaping 2017-04-24 19:51:49 +00:00			`if any(not term.strip() for term in values):`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`raise errors.SearchError("Empty compound value")`
server/search: add search term escaping 2017-04-24 19:51:49 +00:00			`return criteria.ArrayCriterion(original_value, values)`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`if re.search(r"(?<!\\)\.(?<!\\)\.", value):`
			`low, high = re.split(r"(?<!\\)\.(?<!\\)\.", value, 1)`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`if not low and not high:`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`raise errors.SearchError("Empty ranged value")`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`return criteria.RangedCriterion(original_value, low, high)`
			`return criteria.PlainCriterion(original_value, value)`

server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def _parse_anonymous(value: str, negated: bool) -> tokens.AnonymousToken:`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`criterion = _create_criterion(value, value)`
			`return tokens.AnonymousToken(criterion, negated)`

server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def _parse_named(key: str, value: str, negated: bool) -> tokens.NamedToken:`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`original_value = value`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`if key.endswith("-min"):`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`key = key[:-4]`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`value += ".."`
			`elif key.endswith("-max"):`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`key = key[:-4]`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`value = ".." + value`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`criterion = _create_criterion(original_value, value)`
			`return tokens.NamedToken(key, criterion, negated)`

server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def _parse_special(value: str, negated: bool) -> tokens.SpecialToken:`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`return tokens.SpecialToken(value, negated)`

server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def _parse_sort(value: str, negated: bool) -> tokens.SortToken:`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`if value.count(",") == 0:`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`order_str = None`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`elif value.count(",") == 1:`
			`value, order_str = value.split(",")`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`else:`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`raise errors.SearchError("Too many commas in sort style token.")`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`try:`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`order = {`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`"asc": tokens.SortToken.SORT_ASC,`
			`"desc": tokens.SortToken.SORT_DESC,`
			`"": tokens.SortToken.SORT_DEFAULT,`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`None: tokens.SortToken.SORT_DEFAULT,`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`}[order_str]`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`except KeyError:`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`raise errors.SearchError("Unknown search direction: %r." % order_str)`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`if negated:`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`order = {`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`tokens.SortToken.SORT_ASC: tokens.SortToken.SORT_DESC,`
			`tokens.SortToken.SORT_DESC: tokens.SortToken.SORT_ASC,`
			`tokens.SortToken.SORT_DEFAULT: tokens.SortToken.SORT_NEGATED_DEFAULT, # noqa: E501`
			`tokens.SortToken.SORT_NEGATED_DEFAULT: tokens.SortToken.SORT_DEFAULT, # noqa: E501`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`}[order]`
			`return tokens.SortToken(value, order)`

server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00
server: drop old style class declarations 2016-10-22 12:43:52 +00:00			`class Parser:`
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def parse(self, query_text: str) -> SearchQuery:`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`query = SearchQuery()`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`for chunk in re.split(r"\s+", (query_text or "").lower()):`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`if not chunk:`
			`continue`
			`negated = False`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`if chunk[0] == "-":`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`chunk = chunk[1:]`
server/search: fix searching for --- Allow only one negation sign. Also throw an error if user searches only for "-". 2017-04-24 17:55:02 +00:00			`negated = True`
			`if not chunk:`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`raise errors.SearchError("Empty negated token.")`
			`match = re.match(r"^(.?)(?<!\\):(.)$", chunk)`
server/search: match only [a-z-]* for named tokens Adds ability to search for : for example. Still not perfect, but it's a start. 2016-09-26 20:06:18 +00:00			`if match:`
			`key, value = list(match.groups())`
server/search: add search term escaping 2017-04-24 19:51:49 +00:00			`key = util.unescape(key)`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`if key == "sort":`
			`query.sort_tokens.append(_parse_sort(value, negated))`
			`elif key == "special":`
			`query.special_tokens.append(_parse_special(value, negated))`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`else:`
			`query.named_tokens.append(`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`_parse_named(key, value, negated)`
			`)`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`else:`
client+server: implement code autoformatting using prettier and black 2020-06-05 22:03:37 +00:00			`query.anonymous_tokens.append(_parse_anonymous(chunk, negated))`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`return query`