duckduckgo_definitions.py 5.83 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
"""
DuckDuckGo (definitions)

- `Instant Answer API`_
- `DuckDuckGo query`_

.. _Instant Answer API: https://duckduckgo.com/api
.. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1

"""

asciimoo's avatar
asciimoo committed
12
import json
Dalf's avatar
Dalf committed
13
from lxml import html
Adam Tauber's avatar
Adam Tauber committed
14
from re import compile
Dalf's avatar
Dalf committed
15
from searx.engines.xpath import extract_text
16
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
Adam Tauber's avatar
Adam Tauber committed
17
from searx.url_utils import urlencode
18
from searx.utils import html_to_text, match_language
asciimoo's avatar
asciimoo committed
19

20 21 22
url = 'https://api.duckduckgo.com/'\
    + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

23 24
http_regex = compile(r'^http:')

asciimoo's avatar
asciimoo committed
25

Dalf's avatar
Dalf committed
26 27 28 29
def result_to_text(url, text, htmlResult):
    # TODO : remove result ending with "Meaning" or "Category"
    dom = html.fromstring(htmlResult)
    a = dom.xpath('//a')
30
    if len(a) >= 1:
Dalf's avatar
Dalf committed
31 32 33 34
        return extract_text(a[0])
    else:
        return text

35

asciimoo's avatar
asciimoo committed
36
def request(query, params):
37
    params['url'] = url.format(query=urlencode({'q': query}))
38
    language = match_language(params['language'], supported_languages, language_aliases)
39 40
    language = language.split('-')[0]
    params['headers']['Accept-Language'] = language
asciimoo's avatar
asciimoo committed
41 42 43 44 45
    return params


def response(resp):
    results = []
Dalf's avatar
Dalf committed
46

47 48
    search_res = json.loads(resp.text)

Dalf's avatar
Dalf committed
49 50 51 52 53 54 55 56 57
    content = ''
    heading = search_res.get('Heading', '')
    attributes = []
    urls = []
    infobox_id = None
    relatedTopics = []

    # add answer if there is one
    answer = search_res.get('Answer', '')
58 59 60
    if answer:
        if search_res.get('AnswerType', '') not in ['calc']:
            results.append({'answer': html_to_text(answer)})
Dalf's avatar
Dalf committed
61 62

    # add infobox
asciimoo's avatar
asciimoo committed
63
    if 'Definition' in search_res:
64
        content = content + search_res.get('Definition', '')
Dalf's avatar
Dalf committed
65 66 67 68 69 70 71 72 73 74 75

    if 'Abstract' in search_res:
        content = content + search_res.get('Abstract', '')

    # image
    image = search_res.get('Image', '')
    image = None if image == '' else image

    # attributes
    if 'Infobox' in search_res:
        infobox = search_res.get('Infobox', None)
76
        if 'content' in infobox:
Dalf's avatar
Dalf committed
77
            for info in infobox.get('content'):
78 79
                attributes.append({'label': info.get('label'),
                                  'value': info.get('value')})
Dalf's avatar
Dalf committed
80 81 82 83 84 85

    # urls
    for ddg_result in search_res.get('Results', []):
        if 'FirstURL' in ddg_result:
            firstURL = ddg_result.get('FirstURL', '')
            text = ddg_result.get('Text', '')
86 87
            urls.append({'title': text, 'url': firstURL})
            results.append({'title': heading, 'url': firstURL})
Dalf's avatar
Dalf committed
88 89

    # related topics
90
    for ddg_result in search_res.get('RelatedTopics', []):
Dalf's avatar
Dalf committed
91
        if 'FirstURL' in ddg_result:
92 93 94
            suggestion = result_to_text(ddg_result.get('FirstURL', None),
                                        ddg_result.get('Text', None),
                                        ddg_result.get('Result', None))
Dalf's avatar
Dalf committed
95 96 97 98
            if suggestion != heading:
                results.append({'suggestion': suggestion})
        elif 'Topics' in ddg_result:
            suggestions = []
99 100
            relatedTopics.append({'name': ddg_result.get('Name', ''),
                                 'suggestions': suggestions})
Dalf's avatar
Dalf committed
101
            for topic_result in ddg_result.get('Topics', []):
102 103 104
                suggestion = result_to_text(topic_result.get('FirstURL', None),
                                            topic_result.get('Text', None),
                                            topic_result.get('Result', None))
Dalf's avatar
Dalf committed
105 106 107 108 109 110 111 112
                if suggestion != heading:
                    suggestions.append(suggestion)

    # abstract
    abstractURL = search_res.get('AbstractURL', '')
    if abstractURL != '':
        # add as result ? problem always in english
        infobox_id = abstractURL
113 114
        urls.append({'title': search_res.get('AbstractSource'),
                    'url': abstractURL})
Dalf's avatar
Dalf committed
115 116 117 118 119 120

    # definition
    definitionURL = search_res.get('DefinitionURL', '')
    if definitionURL != '':
        # add as result ? as answer ? problem always in english
        infobox_id = definitionURL
121 122
        urls.append({'title': search_res.get('DefinitionSource'),
                    'url': definitionURL})
Dalf's avatar
Dalf committed
123

124 125
    # to merge with wikidata's infobox
    if infobox_id:
126
        infobox_id = http_regex.sub('https:', infobox_id)
127

Dalf's avatar
Dalf committed
128 129
    # entity
    entity = search_res.get('Entity', None)
130 131 132
    # TODO continent / country / department / location / waterfall /
    #      mountain range :
    #      link to map search, get weather, near by locations
Dalf's avatar
Dalf committed
133 134
    # TODO musician : link to music search
    # TODO concert tour : ??
135 136
    # TODO film / actor / television  / media franchise :
    #      links to IMDB / rottentomatoes (or scrap result)
Dalf's avatar
Dalf committed
137 138 139 140 141 142 143 144 145 146 147 148 149
    # TODO music : link tu musicbrainz / last.fm
    # TODO book : ??
    # TODO artist / playwright : ??
    # TODO compagny : ??
    # TODO software / os : ??
    # TODO software engineer : ??
    # TODO prepared food : ??
    # TODO website : ??
    # TODO performing art : ??
    # TODO prepared food : ??
    # TODO programming language : ??
    # TODO file format : ??

150
    if len(heading) > 0:
Dalf's avatar
Dalf committed
151
        # TODO get infobox.meta.value where .label='article_title'
152 153
        if image is None and len(attributes) == 0 and len(urls) == 1 and\
           len(relatedTopics) == 0 and len(content) == 0:
154
            results.append({
155 156 157 158
                           'url': urls[0]['url'],
                           'title': heading,
                           'content': content
                           })
159 160
        else:
            results.append({
161 162 163 164 165 166 167 168 169
                           'infobox': heading,
                           'id': infobox_id,
                           'entity': entity,
                           'content': content,
                           'img_src': image,
                           'attributes': attributes,
                           'urls': urls,
                           'relatedTopics': relatedTopics
                           })
asciimoo's avatar
asciimoo committed
170 171

    return results