/
/
opt
/
gsutil
/
third_party
/
charset_normalizer
/
tests
Server: in-mum-web1112.main-hosting.eu (62.72.28.111)
You: 216.73.216.63
PHP 8.3.16
Dir:
/opt/gsutil/third_party/charset_normalizer/tests
Edit:
/opt/gsutil/third_party/charset_normalizer/tests/test_coherence_detection.py
import pytest from charset_normalizer.cd import encoding_languages, mb_encoding_languages, is_multi_byte_encoding, get_target_features, filter_alt_coherence_matches @pytest.mark.parametrize( "iana_encoding, expected_languages", [ ("cp864", ["Arabic", "Farsi"]), ("cp862", ["Hebrew"]), ("cp737", ["Greek"]), ("cp424", ["Hebrew"]), ("cp273", ["Latin Based"]), ("johab", ["Korean"]), ("shift_jis", ["Japanese"]), ("mac_greek", ["Greek"]), ("iso2022_jp", ["Japanese"]) ] ) def test_infer_language_from_cp(iana_encoding, expected_languages): languages = mb_encoding_languages(iana_encoding) if is_multi_byte_encoding(iana_encoding) else encoding_languages(iana_encoding) for expected_language in expected_languages: assert expected_language in languages, "Wrongly detected language for given code page" @pytest.mark.parametrize( "language, expected_have_accents, expected_pure_latin", [ ("English", False, True), ("French", True, True), ("Hebrew", False, False), ("Arabic", False, False), ("Vietnamese", True, True), ("Turkish", True, True) ] ) def test_target_features(language, expected_have_accents, expected_pure_latin): target_have_accents, target_pure_latin = get_target_features(language) assert target_have_accents is expected_have_accents assert target_pure_latin is expected_pure_latin @pytest.mark.parametrize( "matches, expected_return", [ ([("English", 0.88,), ("English—", 0.99)], [("English", 0.99)]), ([("English", 0.88,), ("English—", 0.99), ("English——", 0.999)], [("English", 0.999)]), ([("English", 0.88,), ("English—", 0.77)], [("English", 0.88)]), ([("English", 0.88,), ("Italian", 0.77)], [("English", 0.88), ("Italian", 0.77)]), ] ) def test_filter_alt_coherence_matches(matches, expected_return): results = filter_alt_coherence_matches(matches) assert results == expected_return
Ukuran: 2.0 KB