diff options
| author | Greg Roach <fisharebest@gmail.com> | 2017-01-05 11:04:28 +0000 |
|---|---|---|
| committer | Greg Roach <fisharebest@gmail.com> | 2017-01-05 11:18:04 +0000 |
| commit | 991b93ddf82ec1af763df7d5e82b5c7c43ed1347 (patch) | |
| tree | 439b9ea6635f41c40c277e101c426171072d8b4a | |
| parent | 2032bcdd12e743ce4811757ac21942de54105026 (diff) | |
| download | webtrees-991b93ddf82ec1af763df7d5e82b5c7c43ed1347.tar.gz webtrees-991b93ddf82ec1af763df7d5e82b5c7c43ed1347.tar.bz2 webtrees-991b93ddf82ec1af763df7d5e82b5c7c43ed1347.zip | |
#1175 - use the php-intl collator class, where it is installed
| -rw-r--r-- | app/I18N.php | 186 |
1 files changed, 64 insertions, 122 deletions
diff --git a/app/I18N.php b/app/I18N.php index 0dc59ebea6..51e8d7d120 100644 --- a/app/I18N.php +++ b/app/I18N.php @@ -15,6 +15,7 @@ */ namespace Fisharebest\Webtrees; +use Collator; use Exception; use Fisharebest\ExtCalendar\ArabicCalendar; use Fisharebest\ExtCalendar\CalendarInterface; @@ -37,25 +38,21 @@ class I18N { /** @var Translator An object that performs translation*/ private static $translator; + /** @var Collator From the php-intl library */ + private static $collator; + // Digits are always rendered LTR, even in RTL text. const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; - // Reversable character conversions from the UNICODE 5.1 database. - // It excludes ambiguous (turkish dotless i) and mixed-case (Dz) characters. - // The characters should be arranged in default unicode-collation order. - const ALPHABET_LOWER = 'aàáâãäåāăąǎǟǡǻȁȃȧḁạảấầẩẫậắằẳẵặⓐaæǣǽbḃḅḇⓑbƀɓƃcçćĉċčḉⅽⓒcƈdďḋḍḏḑḓⅾⓓddždzđɖɗƌðeèéêëēĕėęěȅȇȩḕḗḙḛḝẹẻẽếềểễệⓔeǝəɛfḟⓕfƒgĝğġģǧǵḡⓖgǥɠɣƣhĥȟḣḥḧḩḫⓗhƕħiìíîïĩīĭįǐȉȋḭḯỉịⅰⓘiⅱⅲijⅳⅸɨɩjĵⓙjkķǩḱḳḵⓚkƙlĺļľḷḹḻḽⅼⓛlŀljłƚmḿṁṃⅿⓜmnñńņňǹṅṇṉṋⓝnnjɲƞŋoòóôõöōŏőơǒǫǭȍȏȫȭȯȱṍṏṑṓọỏốồổỗộớờởỡợⓞoœøǿɔɵȣpṕṗⓟpƥqⓠqrŕŗřȑȓṙṛṝṟⓡrʀsśŝşšșṡṣṥṧṩⓢsʃtţťțṫṭṯṱⓣtŧƭʈuùúûüũūŭůűųưǔǖǘǚǜȕȗṳṵṷṹṻụủứừửữựⓤuʉɯʊvṽṿⅴⓥvⅵⅶⅷʋʌwŵẁẃẅẇẉⓦwxẋẍⅹⓧxⅺⅻyýÿŷȳẏỳỵỷỹⓨyƴzźżžẑẓẕⓩzƶȥǯʒƹȝþƿƨƽƅάαἀἁἂἃἄἅἆἇὰάᾀᾁᾂᾃᾄᾅᾆᾇᾰᾱᾳβγδέεἐἑἒἓἔἕὲέϝϛζήηἠἡἢἣἤἥἦἧὴήᾐᾑᾒᾓᾔᾕᾖᾗῃθϊἰἱἲἳἴἵἶἷὶίῐῑκϗλμνξοόὀὁὂὃὄὅὸόπϟϙρῥσϲτυϋύὑὓὕὗὺύῠῡφχψωώὠὡὢὣὤὥὦὧὼώᾠᾡᾢᾣᾤᾥᾦᾧῳϡϸϻϣϥϧϩϫϭϯаӑӓәӛӕбвгґғҕдԁђԃѓҙеѐёӗєжӂӝҗзԅӟѕӡԇиѝӣҋӥіїйјкқӄҡҟҝлӆљԉмӎнӊңӈҥњԋоӧөӫпҧҁрҏсԍҫтԏҭћќуӯўӱӳүұѹфхҳһѡѿѽѻцҵчӵҷӌҹҽҿџшщъыӹьҍѣэӭюяѥѧѫѩѭѯѱѳѵѷҩաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆȼɂɇɉɋɍɏͱͳͷͻͼͽӏӷӻӽӿԑԓԕԗԙԛԝԟԡԣԥᵹᵽỻỽỿⅎↄⰰⰱⰲⰳⰴⰵⰶⰷⰸⰹⰺⰻⰼⰽⰾⰿⱀⱁⱂⱃⱄⱅⱆⱇⱈⱉⱊⱋⱌⱍⱎⱏⱐⱑⱒⱓⱔⱕⱖⱗⱘⱙⱚⱛⱜⱝⱞⱡⱨⱪⱬⱳⱶⲁⲃⲅⲇⲉⲋⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱⲳⲵⲷⲹⲻⲽⲿⳁⳃⳅⳇⳉⳋⳍⳏⳑⳓⳕⳗⳙⳛⳝⳟⳡⳣⳬⳮⴀⴁⴂⴃⴄⴅⴆⴇⴈⴉⴊⴋⴌⴍⴎⴏⴐⴑⴒⴓⴔⴕⴖⴗⴘⴙⴚⴛⴜⴝⴞⴟⴠⴡⴢⴣⴤⴥꙁꙃꙅꙇꙉꙋꙍꙏꙑꙓꙕꙗꙙꙛꙝꙟꙣꙥꙧꙩꙫꙭꚁꚃꚅꚇꚉꚋꚍꚏꚑꚓꚕꚗꜣꜥꜧꜩꜫꜭꜯꜳꜵꜷꜹꜻꜽꜿꝁꝃꝅꝇꝉꝋꝍꝏꝑꝓꝕꝗꝙꝛꝝꝟꝡꝣꝥꝧꝩꝫꝭꝯꝺꝼꝿꞁꞃꞅꞇꞌ'; - const ALPHABET_UPPER = 'AÀÁÂÃÄÅĀĂĄǍǞǠǺȀȂȦḀẠẢẤẦẨẪẬẮẰẲẴẶⒶAÆǢǼBḂḄḆⒷBɃƁƂCÇĆĈĊČḈⅭⒸCƇDĎḊḌḎḐḒⅮⒹDDŽDZĐƉƊƋÐEÈÉÊËĒĔĖĘĚȄȆȨḔḖḘḚḜẸẺẼẾỀỂỄỆⒺEƎƏƐFḞⒻFƑGĜĞĠĢǦǴḠⒼGǤƓƔƢHĤȞḢḤḦḨḪⒽHǶĦIÌÍÎÏĨĪĬĮǏȈȊḬḮỈỊⅠⒾIⅡⅢIJⅣⅨƗƖJĴⒿJKĶǨḰḲḴⓀKƘLĹĻĽḶḸḺḼⅬⓁLĿLJŁȽMḾṀṂⅯⓂMNÑŃŅŇǸṄṆṈṊⓃNNJƝȠŊOÒÓÔÕÖŌŎŐƠǑǪǬȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢⓄOŒØǾƆƟȢPṔṖⓅPƤQⓆQRŔŖŘȐȒṘṚṜṞⓇRƦSŚŜŞŠȘṠṢṤṦṨⓈSƩTŢŤȚṪṬṮṰⓉTŦƬƮUÙÚÛÜŨŪŬŮŰŲƯǓǕǗǙǛȔȖṲṴṶṸṺỤỦỨỪỬỮỰⓊUɄƜƱVṼṾⅤⓋVⅥⅦⅧƲɅWŴẀẂẄẆẈⓌWXẊẌⅩⓍXⅪⅫYÝŸŶȲẎỲỴỶỸⓎYƳZŹŻŽẐẒẔⓏZƵȤǮƷƸȜÞǷƧƼƄΆΑἈἉἊἋἌἍἎἏᾺΆᾈᾉᾊᾋᾌᾍᾎᾏᾸᾹᾼΒΓΔΈΕἘἙἚἛἜἝῈΈϜϚΖΉΗἨἩἪἫἬἭἮἯῊΉᾘᾙᾚᾛᾜᾝᾞᾟῌΘΪἸἹἺἻἼἽἾἿῚΊῘῙΚϏΛΜΝΞΟΌὈὉὊὋὌὍῸΌΠϞϘΡῬΣϹΤΥΫΎὙὛὝὟῪΎῨῩΦΧΨΩΏὨὩὪὫὬὭὮὯῺΏᾨᾩᾪᾫᾬᾭᾮᾯῼϠϷϺϢϤϦϨϪϬϮАӐӒӘӚӔБВГҐҒҔДԀЂԂЃҘЕЀЁӖЄЖӁӜҖЗԄӞЅӠԆИЍӢҊӤІЇЙЈКҚӃҠҞҜЛӅЉԈМӍНӉҢӇҤЊԊОӦӨӪПҦҀРҎСԌҪТԎҬЋЌУӮЎӰӲҮҰѸФХҲҺѠѾѼѺЦҴЧӴҶӋҸҼҾЏШЩЪЫӸЬҌѢЭӬЮЯѤѦѪѨѬѮѰѲѴѶҨԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖȻɁɆɈɊɌɎͰͲͶϽϾϿӀӶӺӼӾԐԒԔԖԘԚԜԞԠԢԤꝽⱣỺỼỾℲↃⰀⰁⰂⰃⰄⰅⰆⰇⰈⰉⰊⰋⰌⰍⰎⰏⰐⰑⰒⰓⰔⰕⰖⰗⰘⰙⰚⰛⰜⰝⰞⰟⰠⰡⰢⰣⰤⰥⰦⰧⰨⰩⰪⰫⰬⰭⰮⱠⱧⱩⱫⱲⱵⲀⲂⲄⲆⲈⲊⲌⲎⲐⲒⲔⲖⲘⲚⲜⲞⲠⲢⲤⲦⲨⲪⲬⲮⲰⲲⲴⲶⲸⲺⲼⲾⳀⳂⳄⳆⳈⳊⳌⳎⳐⳒⳔⳖⳘⳚⳜⳞⳠⳢⳫⳭႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅꙀꙂꙄꙆꙈꙊꙌꙎꙐꙒꙔꙖꙘꙚꙜꙞꙢꙤꙦꙨꙪꙬꚀꚂꚄꚆꚈꚊꚌꚎꚐꚒꚔꚖꜢꜤꜦꜨꜪꜬꜮꜲꜴꜶꜸꜺꜼꜾꝀꝂꝄꝆꝈꝊꝌꝎꝐꝒꝔꝖꝘꝚꝜꝞꝠꝢꝤꝦꝨꝪꝬꝮꝹꝻꝾꞀꞂꞄꞆꞋ'; - - /** @var string Alphabet, in lower case, for the current locale. */ - private static $alphabet_lower = 'abcdefghijklmnopqrstuvwxyz'; + // These locales need special handling for the dotless letter I. + const DOTLESS_I_LOCALES = ['az', 'tr']; + const DOTLESS_I_TOLOWER = ['I' => 'ı', 'İ' => 'i']; + const DOTLESS_I_TOUPPER = ['ı' => 'I', 'i' => 'İ']; - /** @var string Alphabet, in upper case, for the current locale. */ - private static $alphabet_upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; - - /** @var int[][] Character ranges used by each script. */ - private static $scripts = [ - ['Latn', 0x0041, 0x005A], // a-z - ['Latn', 0x0061, 0x007A], // A-Z + // The ranges of characters used by each script. + const SCRIPT_CHARACTER_RANGES = [ + ['Latn', 0x0041, 0x005A], + ['Latn', 0x0061, 0x007A], ['Latn', 0x0100, 0x02AF], ['Grek', 0x0370, 0x03FF], ['Cyrl', 0x0400, 0x052F], @@ -75,26 +72,33 @@ class I18N { ['Hans', 0x20000, 0x2FA1F], // Mixed CJK, not just Hans ]; - /** @var string[] Characters that are displayed in mirror form in RTL text. */ - private static $mirror_characters = [ - '(' => ')', - ')' => '(', - '[' => ']', - ']' => '[', - '{' => '}', - '}' => '{', - '<' => '>', - '>' => '<', - '‹' => '›', - '›' => '‹', - '«' => '»', - '»' => '«', - '﴾' => '﴿', - '﴿' => '﴾', - '“' => '”', - '”' => '“', - '‘' => '’', - '’' => '‘', + // Characters that are displayed in mirror form in RTL text. + const MIRROR_CHARACTERS = [ + '(' => ')', + ')' => '(', + '[' => ']', + ']' => '[', + '{' => '}', + '}' => '{', + '<' => '>', + '>' => '<', + '‹' => '›', + '›' => '‹', + '«' => '»', + '»' => '«', + '﴾' => '﴿', + '﴿' => '﴾', + '“' => '”', + '”' => '“', + '‘' => '’', + '’' => '‘', + ]; + + // Default list of locales to show in the menu. + const DEFAULT_LOCALES = [ + 'ar', 'bg', 'bs', 'ca', 'cs', 'da', 'de', 'el', 'en-GB', 'en-US', 'es', + 'et', 'fi', 'fr', 'he', 'hr', 'hu', 'is', 'it', 'ka', 'lt', 'mr', 'nb', + 'nl', 'nn', 'pl', 'pt', 'ru', 'sk', 'sv', 'tr', 'uk', 'vi', 'zh-Hans', ]; /** @var string Punctuation used to separate list items, typically a comma */ @@ -108,14 +112,10 @@ class I18N { public static function activeLocales() { $code_list = Site::getPreference('LANGUAGES'); - if ($code_list) { - $codes = explode(',', $code_list); + if (empty($code_list)) { + $codes = self::DEFAULT_LOCALES; } else { - $codes = [ - 'ar', 'bg', 'bs', 'ca', 'cs', 'da', 'de', 'el', 'en-GB', 'en-US', 'es', - 'et', 'fi', 'fr', 'he', 'hr', 'hu', 'is', 'it', 'ka', 'lt', 'mr', 'nb', - 'nl', 'nn', 'pl', 'pt', 'ru', 'sk', 'sv', 'tr', 'uk', 'vi', 'zh-Hans', - ]; + $codes = explode(',', $code_list); } $locales = []; @@ -435,13 +435,17 @@ class I18N { // Create a translator self::$translator = new Translator($translations, self::$locale->pluralRule()); - // Alphabetic sorting sequence (upper-case letters), used by webtrees to sort strings - list(, self::$alphabet_upper) = explode('=', self::$translator->translate('ALPHABET_upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ')); - // Alphabetic sorting sequence (lower-case letters), used by webtrees to sort strings - list(, self::$alphabet_lower) = explode('=', self::$translator->translate('ALPHABET_lower=abcdefghijklmnopqrstuvwxyz')); - self::$list_separator = /* I18N: This punctuation is used to separate lists of items */ self::translate(', '); + // Create a collator + try { + self::$collator = new Collator(self::$locale->code()); + // Ignore upper/lower case differences + self::$collator->setStrength(Collator::SECONDARY); + } catch (Exception $ex) { + // PHP-INTL is not installed? We'll use a fallback later. + } + return self::$locale->languageTag(); } @@ -561,7 +565,7 @@ class I18N { } // Mirrored characters - $text = strtr($text, self::$mirror_characters); + $text = strtr($text, self::MIRROR_CHARACTERS); $reversed = ''; $digits = ''; @@ -602,9 +606,7 @@ class I18N { } /** - * UTF8 version of PHP::strcasecmp() - * - * Perform a case-insensitive comparison of two strings, using rules from the current locale + * Perform a case-insensitive comparison of two strings. * * @param string $string1 * @param string $string2 @@ -612,98 +614,38 @@ class I18N { * @return int */ public static function strcasecmp($string1, $string2) { - $strpos1 = 0; - $strpos2 = 0; - $strlen1 = strlen($string1); - $strlen2 = strlen($string2); - while ($strpos1 < $strlen1 && $strpos2 < $strlen2) { - $byte1 = ord($string1[$strpos1]); - $byte2 = ord($string2[$strpos2]); - if (($byte1 & 0xE0) === 0xC0) { - $chr1 = $string1[$strpos1++] . $string1[$strpos1++]; - } elseif (($byte1 & 0xF0) === 0xE0) { - $chr1 = $string1[$strpos1++] . $string1[$strpos1++] . $string1[$strpos1++]; - } else { - $chr1 = $string1[$strpos1++]; - } - if (($byte2 & 0xE0) === 0xC0) { - $chr2 = $string2[$strpos2++] . $string2[$strpos2++]; - } elseif (($byte2 & 0xF0) === 0xE0) { - $chr2 = $string2[$strpos2++] . $string2[$strpos2++] . $string2[$strpos2++]; - } else { - $chr2 = $string2[$strpos2++]; - } - if ($chr1 === $chr2) { - continue; - } - // Try the local alphabet first - $offset1 = strpos(self::$alphabet_lower, $chr1); - if ($offset1 === false) { - $offset1 = strpos(self::$alphabet_upper, $chr1); - } - $offset2 = strpos(self::$alphabet_lower, $chr2); - if ($offset2 === false) { - $offset2 = strpos(self::$alphabet_upper, $chr2); - } - if ($offset1 !== false && $offset2 !== false) { - if ($offset1 === $offset2) { - continue; - } else { - return $offset1 - $offset2; - } - } - // Try the global alphabet next - $offset1 = strpos(self::ALPHABET_LOWER, $chr1); - if ($offset1 === false) { - $offset1 = strpos(self::ALPHABET_UPPER, $chr1); - } - $offset2 = strpos(self::ALPHABET_LOWER, $chr2); - if ($offset2 === false) { - $offset2 = strpos(self::ALPHABET_UPPER, $chr2); - } - if ($offset1 !== false && $offset2 !== false) { - if ($offset1 === $offset2) { - continue; - } else { - return $offset1 - $offset2; - } - } - // Just compare by unicode order - return strcmp($chr1, $chr2); + if (self::$collator instanceof Collator) { + return self::$collator->compare($string1, $string2); + } else { + return strcmp(self::strtolower($string1), self::strtolower($string2)); } - // Shortest string comes first. - return ($strlen1 - $strpos1) - ($strlen2 - $strpos2); } /** - * UTF8 version of PHP::strtolower() - * - * Convert a string to lower case, using the rules from the current locale + * Convert a string to lower case. * * @param string $string * * @return string */ public static function strtolower($string) { - if (self::$locale->language()->code() === 'tr' || self::$locale->language()->code() === 'az') { - $string = strtr($string, ['I' => 'ı', 'İ' => 'i']); + if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { + $string = strtr($string, self::DOTLESS_I_TOLOWER); } return mb_strtolower($string); } /** - * UTF8 version of PHP::strtoupper() - * - * Convert a string to upper case, using the rules from the current locale + * Convert a string to upper case. * * @param string $string * * @return string */ public static function strtoupper($string) { - if (self::$locale->language()->code() === 'tr' || self::$locale->language()->code() === 'az') { - $string = strtr($string, ['ı' => 'I', 'i' => 'İ']); + if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { + $string = strtr($string, self::DOTLESS_I_TOUPPER); } return mb_strtoupper($string); @@ -762,7 +704,7 @@ class I18N { return 'Latn'; } - foreach (self::$scripts as $range) { + foreach (self::SCRIPT_CHARACTER_RANGES as $range) { if ($code_point >= $range[1] && $code_point <= $range[2]) { return $range[0]; } |
