server/posts: change reverse image search API
Add exact duplicates search; refactor to use classes over dictionaries
This commit is contained in:
		
							parent
							
								
									04b820c730
								
							
						
					
					
						commit
						4cb613a5c9
					
				
							
								
								
									
										28
									
								
								API.md
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								API.md
									
									
									
									
									
								
							| @ -1070,17 +1070,15 @@ data. | ||||
| 
 | ||||
| - **Output** | ||||
| 
 | ||||
|     A list of [image search results](#image-search-result). | ||||
|     An [image search result](#image-search-result). | ||||
| 
 | ||||
| - **Errors** | ||||
| 
 | ||||
|     - input file is not an image | ||||
|     - privileges are too low | ||||
| 
 | ||||
| - **Description** | ||||
| 
 | ||||
|     Retrieves posts that look like the input image. Works only on images and | ||||
|     animations, i.e. does not work for videos and Flash movies. | ||||
|     Retrieves posts that look like the input image. | ||||
| 
 | ||||
| ## Listing comments | ||||
| - **Request** | ||||
| @ -2152,14 +2150,15 @@ A result of reverse image search operation. | ||||
| 
 | ||||
| ```json5 | ||||
| { | ||||
|     "results": [ | ||||
|     "exactPost": <exact-post>, | ||||
|     "similarPosts": [ | ||||
|         { | ||||
|             "dist": <distance>, | ||||
|             "post": <post> | ||||
|             "distance": <distance>, | ||||
|             "post": <similar-post> | ||||
|         }, | ||||
|         { | ||||
|             "dist": <distance>, | ||||
|             "post": <post> | ||||
|             "distance": <distance>, | ||||
|             "post": <similar-post> | ||||
|         }, | ||||
|         ... | ||||
|     ] | ||||
| @ -2167,9 +2166,14 @@ A result of reverse image search operation. | ||||
| ``` | ||||
| 
 | ||||
| **Field meaning** | ||||
| - `<dist>`: distance from the original image (0..1). The lower this value is, the more similar the | ||||
| post is. | ||||
| - `<post>`: a [post resource](#post). | ||||
| -  `exact-post`: a [post resource](#post) that is exact byte-to-byte duplicate | ||||
|    of the input file. May be `null`. | ||||
| - `<similar-post>`: a [post resource](#post) that isn't exact duplicate, but | ||||
|    visually resembles the input file. Works only on images and animations, i.e. | ||||
|    does not work for videos and Flash movies. For non-images and corrupted | ||||
|    images, this list is empty. | ||||
| - `<distance>`: distance from the original image (0..1). The lower this value | ||||
|    is, the more similar the post is. | ||||
| 
 | ||||
| # Search | ||||
| 
 | ||||
|  | ||||
| @ -15,6 +15,7 @@ reports=no | ||||
| disable= | ||||
|     # we're not java | ||||
|     missing-docstring, | ||||
|     broad-except, | ||||
| 
 | ||||
|     # covered better by pycodestyle | ||||
|     bad-continuation, | ||||
|  | ||||
| @ -212,11 +212,14 @@ def get_posts_by_image(ctx, _params=None): | ||||
|     auth.verify_privilege(ctx.user, 'posts:reverse_search') | ||||
|     content = ctx.get_file('content', required=True) | ||||
|     return { | ||||
|         'results': [ | ||||
|             { | ||||
|                 'dist': item['dist'], | ||||
|                 'post': _serialize_post(ctx, item['post']), | ||||
|             } | ||||
|             for item in posts.search_by_image(content) | ||||
|         ], | ||||
|         'exactPost': | ||||
|             _serialize_post(ctx, posts.search_by_image_exact(content)), | ||||
|         'similarPosts': | ||||
|             [ | ||||
|                 { | ||||
|                     'distance': lookalike.distance, | ||||
|                     'post': _serialize_post(ctx, lookalike.post), | ||||
|                 } | ||||
|                 for lookalike in posts.search_by_image(content) | ||||
|             ], | ||||
|     } | ||||
|  | ||||
| @ -1,7 +1,7 @@ | ||||
| import elasticsearch | ||||
| import elasticsearch_dsl | ||||
| from image_match.elasticsearch_driver import SignatureES | ||||
| from szurubooru import config, errors | ||||
| from szurubooru import config | ||||
| 
 | ||||
| 
 | ||||
| # pylint: disable=invalid-name | ||||
| @ -12,6 +12,13 @@ es = elasticsearch.Elasticsearch([{ | ||||
| session = SignatureES(es, index='szurubooru') | ||||
| 
 | ||||
| 
 | ||||
| class Lookalike: | ||||
|     def __init__(self, score, distance, path): | ||||
|         self.score = score | ||||
|         self.distance = distance | ||||
|         self.path = path | ||||
| 
 | ||||
| 
 | ||||
| def add_image(path, image_content): | ||||
|     if not path or not image_content: | ||||
|         return | ||||
| @ -35,15 +42,14 @@ def search_by_image(image_content): | ||||
|         for result in session.search_image( | ||||
|                 path=image_content,  # sic | ||||
|                 bytestream=True): | ||||
|             yield { | ||||
|                 'score': result['score'], | ||||
|                 'dist': result['dist'], | ||||
|                 'path': result['path'], | ||||
|             } | ||||
|             yield Lookalike( | ||||
|                 score=result['score'], | ||||
|                 distance=result['dist'], | ||||
|                 path=result['path']) | ||||
|     except elasticsearch.exceptions.ElasticsearchException: | ||||
|         raise | ||||
|     except Exception: | ||||
|         raise errors.SearchError('Error searching (invalid input?)') | ||||
|         yield from [] | ||||
| 
 | ||||
| 
 | ||||
| def purge(): | ||||
|  | ||||
| @ -57,6 +57,12 @@ class InvalidPostFlagError(errors.ValidationError): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class PostLookalike(image_hash.Lookalike): | ||||
|     def __init__(self, score, distance, post): | ||||
|         super().__init__(score, distance, post.post_id) | ||||
|         self.post = post | ||||
| 
 | ||||
| 
 | ||||
| SAFETY_MAP = { | ||||
|     db.Post.SAFETY_SAFE: 'safe', | ||||
|     db.Post.SAFETY_SKETCHY: 'sketchy', | ||||
| @ -534,13 +540,20 @@ def merge_posts(source_post, target_post, replace_content): | ||||
|         update_post_content(target_post, content) | ||||
| 
 | ||||
| 
 | ||||
| def search_by_image_exact(image_content): | ||||
|     checksum = util.get_sha1(image_content) | ||||
|     return db.session \ | ||||
|         .query(db.Post) \ | ||||
|         .filter(db.Post.checksum == checksum) \ | ||||
|         .one_or_none() | ||||
| 
 | ||||
| 
 | ||||
| def search_by_image(image_content): | ||||
|     for result in image_hash.search_by_image(image_content): | ||||
|         yield { | ||||
|             'score': result['score'], | ||||
|             'dist': result['dist'], | ||||
|             'post': get_post_by_id(result['path']) | ||||
|         } | ||||
|         yield PostLookalike( | ||||
|             score=result.score, | ||||
|             distance=result.distance, | ||||
|             post=get_post_by_id(result.path)) | ||||
| 
 | ||||
| 
 | ||||
| def populate_reverse_search(): | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 rr-
						rr-