test_utils.py 5.9 KB
Newer Older
1
# -*- coding: utf-8 -*-
Gabor Nagy's avatar
Gabor Nagy committed
2
import mock
Adam Tauber's avatar
Adam Tauber committed
3
import sys
Gabor Nagy's avatar
Gabor Nagy committed
4 5 6
from searx.testing import SearxTestCase
from searx import utils

Adam Tauber's avatar
Adam Tauber committed
7 8 9
if sys.version_info[0] == 3:
    unicode = str

Gabor Nagy's avatar
Gabor Nagy committed
10 11 12 13 14 15 16 17

class TestUtils(SearxTestCase):

    def test_gen_useragent(self):
        self.assertIsInstance(utils.gen_useragent(), str)
        self.assertIsNotNone(utils.gen_useragent())
        self.assertTrue(utils.gen_useragent().startswith('Mozilla'))

18 19 20 21 22
    def test_searx_useragent(self):
        self.assertIsInstance(utils.searx_useragent(), str)
        self.assertIsNotNone(utils.searx_useragent())
        self.assertTrue(utils.searx_useragent().startswith('searx'))

Gabor Nagy's avatar
Gabor Nagy committed
23 24 25 26 27 28 29 30 31 32 33 34 35 36
    def test_highlight_content(self):
        self.assertEqual(utils.highlight_content(0, None), None)
        self.assertEqual(utils.highlight_content(None, None), None)
        self.assertEqual(utils.highlight_content('', None), None)
        self.assertEqual(utils.highlight_content(False, None), None)

        contents = [
            '<html></html>'
            'not<'
        ]
        for content in contents:
            self.assertEqual(utils.highlight_content(content, None), content)

        content = 'a'
Adam Tauber's avatar
Adam Tauber committed
37
        query = b'test'
Gabor Nagy's avatar
Gabor Nagy committed
38
        self.assertEqual(utils.highlight_content(content, query), content)
Adam Tauber's avatar
Adam Tauber committed
39
        query = b'a test'
Gabor Nagy's avatar
Gabor Nagy committed
40 41
        self.assertEqual(utils.highlight_content(content, query), content)

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
    def test_html_to_text(self):
        html = """
        <a href="/testlink" class="link_access_account">
            <span class="toto">
                <span>
                    <img src="test.jpg" />
                </span>
            </span>
            <span class="titi">
                            Test text
            </span>
        </a>
        """
        self.assertIsInstance(utils.html_to_text(html), unicode)
        self.assertIsNotNone(utils.html_to_text(html))
        self.assertEqual(utils.html_to_text(html), "Test text")

59 60 61 62 63 64 65 66 67
    def test_prettify_url(self):
        data = (('https://searx.me/', 'https://searx.me/'),
                (u'https://searx.me/ű', u'https://searx.me/ű'),
                ('https://searx.me/' + (100 * 'a'), 'https://searx.me/[...]aaaaaaaaaaaaaaaaa'),
                (u'https://searx.me/' + (100 * u'ű'), u'https://searx.me/[...]űűűűűűűűűűűűűűűűű'))

        for test_url, expected in data:
            self.assertEqual(utils.prettify_url(test_url, max_length=32), expected)

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
    def test_match_language(self):
        self.assertEqual(utils.match_language('es', ['es']), 'es')
        self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback')
        self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp')

        aliases = {'en-GB': 'en-UK', 'he': 'iw'}

        # guess country
        self.assertEqual(utils.match_language('de-DE', ['de']), 'de')
        self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE')
        self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
        self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX')
        self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB')
        self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK')

        # language aliases
        self.assertEqual(utils.match_language('iw', ['he']), 'he')
        self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw')
        self.assertEqual(utils.match_language('iw-IL', ['he']), 'he')
        self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw')
        self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL')
        self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL')
        self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
        self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')

93 94 95 96 97 98 99
    def test_ecma_unscape(self):
        self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space')
        self.assertEqual(utils.ecma_unescape('text using %xx: %F3'),
                         u'text using %xx: ó')
        self.assertEqual(utils.ecma_unescape('text using %u: %u5409, %u4E16%u754c'),
                         u'text using %u: 吉, 世界')

Gabor Nagy's avatar
Gabor Nagy committed
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137

class TestHTMLTextExtractor(SearxTestCase):

    def setUp(self):
        self.html_text_extractor = utils.HTMLTextExtractor()

    def test__init__(self):
        self.assertEqual(self.html_text_extractor.result, [])

    def test_handle_charref(self):
        self.html_text_extractor.handle_charref('xF')
        self.assertIn(u'\x0f', self.html_text_extractor.result)
        self.html_text_extractor.handle_charref('XF')
        self.assertIn(u'\x0f', self.html_text_extractor.result)

        self.html_text_extractor.handle_charref('97')
        self.assertIn(u'a', self.html_text_extractor.result)

    def test_handle_entityref(self):
        entity = 'test'
        self.html_text_extractor.handle_entityref(entity)
        self.assertIn(entity, self.html_text_extractor.result)


class TestUnicodeWriter(SearxTestCase):

    def setUp(self):
        self.unicode_writer = utils.UnicodeWriter(mock.MagicMock())

    def test_write_row(self):
        row = [1, 2, 3]
        self.assertEqual(self.unicode_writer.writerow(row), None)

    def test_write_rows(self):
        self.unicode_writer.writerow = mock.MagicMock()
        rows = [1, 2, 3]
        self.unicode_writer.writerows(rows)
        self.assertEqual(self.unicode_writer.writerow.call_count, len(rows))
138 139 140 141 142 143 144 145 146 147 148 149 150 151


class TestNewHmac(SearxTestCase):

    def test_bytes(self):
        for secret_key in ['secret', b'secret', 1]:
            if secret_key == 1:
                with self.assertRaises(TypeError):
                    utils.new_hmac(secret_key, b'http://example.com')
                continue
            res = utils.new_hmac(secret_key, b'http://example.com')
            self.assertEqual(
                res,
                '23e2baa2404012a5cc8e4a18b4aabf0dde4cb9b56f679ddc0fd6d7c24339d819')