gallery.accords-library.com/server/szurubooru/search/parser.py

import re
from typing import Match, List
from szurubooru import errors
from szurubooru.search import criteria, tokens
from szurubooru.search.query import SearchQuery
from szurubooru.search.configs import util


def _create_criterion(
        original_value: str, value: str) -> criteria.BaseCriterion:
    if re.search(r'(?<!\\),', value):
        values = re.split(r'(?<!\\),', value)
        if any(not term.strip() for term in values):
            raise errors.SearchError('Empty compound value')
        return criteria.ArrayCriterion(original_value, values)
    if re.search(r'(?<!\\)\.(?<!\\)\.', value):
        low, high = re.split(r'(?<!\\)\.(?<!\\)\.', value, 1)
        if not low and not high:
            raise errors.SearchError('Empty ranged value')
        return criteria.RangedCriterion(original_value, low, high)
    return criteria.PlainCriterion(original_value, value)


def _parse_anonymous(value: str, negated: bool) -> tokens.AnonymousToken:
    criterion = _create_criterion(value, value)
    return tokens.AnonymousToken(criterion, negated)


def _parse_named(key: str, value: str, negated: bool) -> tokens.NamedToken:
    original_value = value
    if key.endswith('-min'):
        key = key[:-4]
        value += '..'
    elif key.endswith('-max'):
        key = key[:-4]
        value = '..' + value
    criterion = _create_criterion(original_value, value)
    return tokens.NamedToken(key, criterion, negated)


def _parse_special(value: str, negated: bool) -> tokens.SpecialToken:
    return tokens.SpecialToken(value, negated)


def _parse_sort(value: str, negated: bool) -> tokens.SortToken:
    if value.count(',') == 0:
        order_str = None
    elif value.count(',') == 1:
        value, order_str = value.split(',')
    else:
        raise errors.SearchError('Too many commas in sort style token.')
    try:
        order = {
            'asc': tokens.SortToken.SORT_ASC,
            'desc': tokens.SortToken.SORT_DESC,
            '': tokens.SortToken.SORT_DEFAULT,
            None: tokens.SortToken.SORT_DEFAULT,
        }[order_str]
    except KeyError:
        raise errors.SearchError(
            'Unknown search direction: %r.' % order_str)
    if negated:
        order = {
            tokens.SortToken.SORT_ASC:
                tokens.SortToken.SORT_DESC,
            tokens.SortToken.SORT_DESC:
                tokens.SortToken.SORT_ASC,
            tokens.SortToken.SORT_DEFAULT:
                tokens.SortToken.SORT_NEGATED_DEFAULT,
            tokens.SortToken.SORT_NEGATED_DEFAULT:
                tokens.SortToken.SORT_DEFAULT,
        }[order]
    return tokens.SortToken(value, order)


class Parser:
    def parse(self, query_text: str) -> SearchQuery:
        query = SearchQuery()
        for chunk in re.split(r'\s+', (query_text or '').lower()):
            if not chunk:
                continue
            negated = False
            if chunk[0] == '-':
                chunk = chunk[1:]
                negated = True
            if not chunk:
                raise errors.SearchError('Empty negated token.')
            match = re.match(r'^(.*?)(?<!\\):(.*)$', chunk)
            if match:
                key, value = list(match.groups())
                key = util.unescape(key)
                if key == 'sort':
                    query.sort_tokens.append(
                        _parse_sort(value, negated))
                elif key == 'special':
                    query.special_tokens.append(
                        _parse_special(value, negated))
                else:
                    query.named_tokens.append(
                        _parse_named(key, value, negated))
            else:
                query.anonymous_tokens.append(
                    _parse_anonymous(chunk, negated))
        return query
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`import re`
server/search: add search term escaping 2017-04-24 19:51:49 +00:00			`from typing import Match, List`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`from szurubooru import errors`
			`from szurubooru.search import criteria, tokens`
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`from szurubooru.search.query import SearchQuery`
server/search: add search term escaping 2017-04-24 19:51:49 +00:00			`from szurubooru.search.configs import util`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def _create_criterion(`
			`original_value: str, value: str) -> criteria.BaseCriterion:`
server/search: add search term escaping 2017-04-24 19:51:49 +00:00			`if re.search(r'(?<!\\),', value):`
			`values = re.split(r'(?<!\\),', value)`
			`if any(not term.strip() for term in values):`
			`raise errors.SearchError('Empty compound value')`
			`return criteria.ArrayCriterion(original_value, values)`
			`if re.search(r'(?<!\\)\.(?<!\\)\.', value):`
			`low, high = re.split(r'(?<!\\)\.(?<!\\)\.', value, 1)`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`if not low and not high:`
			`raise errors.SearchError('Empty ranged value')`
			`return criteria.RangedCriterion(original_value, low, high)`
			`return criteria.PlainCriterion(original_value, value)`

server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def _parse_anonymous(value: str, negated: bool) -> tokens.AnonymousToken:`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`criterion = _create_criterion(value, value)`
			`return tokens.AnonymousToken(criterion, negated)`

server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def _parse_named(key: str, value: str, negated: bool) -> tokens.NamedToken:`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`original_value = value`
			`if key.endswith('-min'):`
			`key = key[:-4]`
			`value += '..'`
			`elif key.endswith('-max'):`
			`key = key[:-4]`
			`value = '..' + value`
			`criterion = _create_criterion(original_value, value)`
			`return tokens.NamedToken(key, criterion, negated)`

server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def _parse_special(value: str, negated: bool) -> tokens.SpecialToken:`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`return tokens.SpecialToken(value, negated)`

server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def _parse_sort(value: str, negated: bool) -> tokens.SortToken:`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`if value.count(',') == 0:`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`order_str = None`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`elif value.count(',') == 1:`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`value, order_str = value.split(',')`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`else:`
			`raise errors.SearchError('Too many commas in sort style token.')`
			`try:`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`order = {`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`'asc': tokens.SortToken.SORT_ASC,`
			`'desc': tokens.SortToken.SORT_DESC,`
			`'': tokens.SortToken.SORT_DEFAULT,`
			`None: tokens.SortToken.SORT_DEFAULT,`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`}[order_str]`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`except KeyError:`
			`raise errors.SearchError(`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`'Unknown search direction: %r.' % order_str)`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`if negated:`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`order = {`
			`tokens.SortToken.SORT_ASC:`
			`tokens.SortToken.SORT_DESC,`
			`tokens.SortToken.SORT_DESC:`
			`tokens.SortToken.SORT_ASC,`
			`tokens.SortToken.SORT_DEFAULT:`
			`tokens.SortToken.SORT_NEGATED_DEFAULT,`
			`tokens.SortToken.SORT_NEGATED_DEFAULT:`
			`tokens.SortToken.SORT_DEFAULT,`
			`}[order]`
			`return tokens.SortToken(value, order)`

server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00
server: drop old style class declarations 2016-10-22 12:43:52 +00:00			`class Parser:`
server: refactor + add type hinting - Added type hinting (for now, 3.5-compatible) - Split `db` namespace into `db` module and `model` namespace - Changed elastic search to be created lazily for each operation - Changed to class based approach in entity serialization to allow stronger typing - Removed `required` argument from `context.get_*` family of functions; now it's implied if `default` argument is omitted - Changed `unalias_dict` implementation to use less magic inputs 2017-02-04 00:08:12 +00:00			`def parse(self, query_text: str) -> SearchQuery:`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`query = SearchQuery()`
			`for chunk in re.split(r'\s+', (query_text or '').lower()):`
			`if not chunk:`
			`continue`
			`negated = False`
server/search: fix searching for --- Allow only one negation sign. Also throw an error if user searches only for "-". 2017-04-24 17:55:02 +00:00			`if chunk[0] == '-':`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`chunk = chunk[1:]`
server/search: fix searching for --- Allow only one negation sign. Also throw an error if user searches only for "-". 2017-04-24 17:55:02 +00:00			`negated = True`
			`if not chunk:`
			`raise errors.SearchError('Empty negated token.')`
server/search: add search term escaping 2017-04-24 19:51:49 +00:00			`match = re.match(r'^(.?)(?<!\\):(.)$', chunk)`
server/search: match only [a-z-]* for named tokens Adds ability to search for : for example. Still not perfect, but it's a start. 2016-09-26 20:06:18 +00:00			`if match:`
			`key, value = list(match.groups())`
server/search: add search term escaping 2017-04-24 19:51:49 +00:00			`key = util.unescape(key)`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`if key == 'sort':`
			`query.sort_tokens.append(`
			`_parse_sort(value, negated))`
			`elif key == 'special':`
			`query.special_tokens.append(`
			`_parse_special(value, negated))`
			`else:`
			`query.named_tokens.append(`
			`_parse_named(key, value, negated))`
			`else:`
server/general: embrace most of PEP8 Ignored only the rules about continuing / hanging indentation. Also, added __init__.py to tests so that pylint discovers them. (I don't buy pytest's BS about installing your package.) 2016-08-14 12:22:53 +00:00			`query.anonymous_tokens.append(`
			`_parse_anonymous(chunk, negated))`
server/search: refactor, extract parsing 2016-06-03 10:13:43 +00:00			`return query`