Author: grothoff Date: 2005-04-03 19:54:54 -0700 (Sun, 03 Apr 2005) New Revision: 584
Modified: i18nHTML/src/commitTranslation.php i18nHTML/src/i18nhtml.inc i18nHTML/src/translate.php i18nHTML/src/vote.php Log: bugfixes Modified: i18nHTML/src/commitTranslation.php =================================================================== --- i18nHTML/src/commitTranslation.php 2005-04-04 02:02:32 UTC (rev 583) +++ i18nHTML/src/commitTranslation.php 2005-04-04 02:54:54 UTC (rev 584) @@ -18,7 +18,7 @@ Boston, MA 02111-1307, USA. */ include("i18nhtml.inc"); -$text = $_REQUEST['text']; +$text = mysql_real_escape_string($_REQUEST['text']); $translation = $_REQUEST['translation']; $back = $_REQUEST['back']; if (!$connection) { @@ -47,7 +47,9 @@ generateFooter(); echo "</body></html>"; } else { - if (array_count_values($text)['%'] != array_count_values($t)['%']) { + $txtCnt = count_chars($text, 1); + $tCnt = count_chars($text, 1); + if ($txtCnt['%'] != $tCnt['%']) { echo "<html><head>"; TITLE("Commit failed."); echo "</head><body>"; @@ -55,7 +57,7 @@ W("The number of percent signs in source text and translation do not match."); W("Note that you must preserve all %%s expressions unchanged."); W("Also, a single displayed %% sign must be translated into two (%%%%) such signs."); - echo "</body></html>" + echo "</body></html>"; } else { $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" . $_SERVER['REMOTE_ADDR'] . "\");"; $result = mysql_query($query, $connection); Modified: i18nHTML/src/i18nhtml.inc =================================================================== --- i18nHTML/src/i18nhtml.inc 2005-04-04 02:02:32 UTC (rev 583) +++ i18nHTML/src/i18nhtml.inc 2005-04-04 02:54:54 UTC (rev 584) @@ -305,7 +305,299 @@ return $text; } + /** + * transcode unicode entities to/from HTML entities + * + * Also, this function transforms HTML entities into their equivalent Unicode entities. + * For example, w.bloggar posts pages using HTML entities. + * If you have to modify these pages using web forms, you would like to get UTF-8 instead. + * + * @link http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple Character Entity Chart + * + * @param string the string to be transcoded + * @param boolean TRUE to transcode to Unicode, FALSE to transcode to HTML + * @return a transcoded string + */ +function transcode($input, $to_unicode=TRUE) { + + // initialize tables only once + static $html_entities, $unicode_entities; + if(!is_array($html_entities)) { + + + // numerical order + $codes = array( + ' ' => ' ', // non-breaking space + '¡' => '¡', // inverted exclamation mark + '¢' => '¢', // cent sign + '£' => '£', // pound sign + '¤' => '¤', // currency sign + '¥' => '¥', // yen sign + '¦' => '¦', // broken bar + '§' => '§', // section sign + '¨' => '¨', // diaeresis + '©' => '©', // copyright sign + 'ª' => 'ª', // feminine ordinal indicator + '«' => '«', // left-pointing double angle quotation mark + '¬' => '¬', // not sign + '­' => '­', // soft hyphen + '®' => '®', // registered sign + '¯' => '¯', // macron + '°' => '°', // degree sign + '±' => '±', // plus-minus sign + '²' => '²', // superscript two + '³' => '³', // superscript three + '´' => '´', // acute accent + 'µ' => 'µ', // micro sign + '¶' => '¶', // pilcrow sign + '·' => '·', // middle dot + '¸' => '¸', // cedilla + '¹' => '¹', // superscript one + 'º' => 'º', // masculine ordinal indicator + '»' => '»', // right-pointing double angle quotation mark + '¼' => '¼', // vulgar fraction one quarter + '½' => '½', // vulgar fraction one half + '¾' => '¾', // vulgar fraction three quarters + '¿' => '¿', // inverted question mark + 'À' => 'À', // latin capital letter A with grave + 'Á' => 'Á', // latin capital letter A with acute + 'Â' => 'Â', // latin capital letter A with circumflex + 'Ã' => 'Ã', // latin capital letter A with tilde + 'Ä' => 'Ä', // latin capital letter A with diaeresis + 'Å' => 'Å', // latin capital letter A with ring above + 'Æ' => 'Æ', // latin capital letter AE + 'Ç' => 'Ç', // latin capital letter C with cedilla + 'È' => 'È', // latin capital letter E with grave + 'É' => 'É', // latin capital letter E with acute + 'Ê' => 'Ê', // latin capital letter E with circumflex + 'Ë' => 'Ë', // latin capital letter E with diaeresis + 'Ì' => 'Ì', // latin capital letter I with grave + 'Í' => 'Í', // latin capital letter I with acute + 'Î' => 'Î', // latin capital letter I with circumflex + 'Ï' => 'Ï', // latin capital letter I with diaeresis + 'Ð' => 'Ð', // latin capital letter ETH + 'Ñ' => 'Ñ', // latin capital letter N with tilde + 'Ò' => 'Ò', // latin capital letter O with grave + 'Ó' => 'Ó', // latin capital letter O with acute + 'Ô' => 'Ô', // latin capital letter O with circumflex + 'Õ' => 'Õ', // latin capital letter O with tilde + 'Ö' => 'Ö', // latin capital letter O with diaeresis + '×' => '×', // multiplication sign + 'Ø' => 'Ø', // latin capital letter O with stroke + 'Ù' => 'Ù', // latin capital letter U with grave + 'Ú' => 'Ú', // latin capital letter U with acute + 'Û' => 'Û', // latin capital letter U with circumflex + 'Ü' => 'Ü', // latin capital letter U with diaeresis + 'Ý' => 'Ý', // latin capital letter Y with acute + 'Þ' => 'Þ', // latin capital letter THORN + 'ß' => 'ß', // latin small letter sharp s + 'à' => 'à', // latin small letter a with grave + 'á' => 'á', // latin small letter a with acute + 'â' => 'â', // latin small letter a with circumflex + 'ã' => 'ã', // latin small letter a with tilde + 'ä' => 'ä', // latin small letter a with diaeresis + 'å' => 'å', // latin small letter a with ring above + 'æ' => 'æ', // latin small letter ae + 'ç' => 'ç', // latin small letter c with cedilla + 'è' => 'è', // latin small letter e with grave + 'é' => 'é', // latin small letter e with acute + 'ê' => 'ê', // latin small letter e with circumflex + 'ë' => 'ë', // latin small letter e with diaeresis + 'ì' => 'ì', // latin small letter i with grave + 'í' => 'í', // latin small letter i with acute + 'î' => 'î', // latin small letter i with circumflex + 'ï' => 'ï', // latin small letter i with diaeresis + 'ð' => 'ð', // latin small letter eth + 'ñ' => 'ñ', // latin small letter n with tilde + 'ò' => 'ò', // latin small letter o with grave + 'ó' => 'ó', // latin small letter o with acute + 'ô' => 'ô', // latin small letter o with circumflex + 'õ' => 'õ', // latin small letter o with tilde + 'ö' => 'ö', // latin small letter o with diaeresis + '÷' => '÷', // division sign + 'ø' => 'ø', // latin small letter o with stroke + 'ù' => 'ù', // latin small letter u with grave + 'ú' => 'ú', // latin small letter u with acute + 'û' => 'û', // latin small letter u with circumflex + 'ü' => 'ü', // latin small letter u with diaeresis + 'ý' => 'ý', // latin small letter y with acute + 'þ' => 'þ', // latin small letter thorn + 'ÿ' => 'ÿ', // + 'Œ' => 'Œ', // latin capital ligature OE + 'œ' => 'œ', // latin small ligature oe + 'Š' => 'Š', // latin capital letter S with caron + 'š' => 'š', // latin small letter s with caron + 'Ÿ' => 'Ÿ', // latin capital letter Y with diaeresis + 'ƒ' => 'ƒ' , // latin small f with hook + 'ˆ' => 'ˆ', // modifier letter circumflex accent + '˜' => '˜', // small tilde + 'Α' => 'Α', // greek capital letter alpha + 'Β' => 'Β', // greek capital letter beta + 'Γ' => 'Γ', // greek capital letter gamma + 'Δ' => 'Δ', // greek capital letter delta + 'Ε' => 'Ε', // greek capital letter epsilon + 'Ζ' => 'Ζ', // greek capital letter zeta + 'Η' => 'Η', // greek capital letter eta + 'Θ' => 'Θ', // greek capital letter theta + 'Ι' => 'Ι', // greek capital letter iota + 'Κ' => 'Κ', // greek capital letter kappa + 'Λ' => 'Λ', // greek capital letter lambda + 'Μ' => 'Μ', // greek capital letter mu + 'Ν' => 'Ν', // greek capital letter nu + 'Ξ' => 'Ξ', // greek capital letter xi + 'Ο' => 'Ο', // greek capital letter omicron + 'Π' => 'Π', // greek capital letter pi + 'Ρ' => 'Ρ', // greek capital letter rho + 'Σ' => 'Σ', // greek capital letter sigma + 'Τ' => 'Τ', // greek capital letter tau + 'Υ' => 'Υ', // greek capital letter upsilon + 'Φ' => 'Φ', // greek capital letter phi + 'Χ' => 'Χ', // greek capital letter chi + 'Ψ' => 'Ψ', // greek capital letter psi + 'Ω' => 'Ω', // greek capital letter omega + 'α' => 'α', // greek small letter alpha + 'β' => 'β', // greek small letter beta + 'γ' => 'γ', // greek small letter gamma + 'δ' => 'δ', // greek small letter delta + 'ε' => 'ε', // greek small letter epsilon + 'ζ' => 'ζ', // greek small letter zeta + 'η' => 'η', // greek small letter eta + 'θ' => 'θ', // greek small letter theta + 'ι' => 'ι', // greek small letter iota + 'κ' => 'κ', // greek small letter kappa + 'λ' => 'λ', // greek small letter lambda + 'μ' => 'μ', // greek small letter mu + 'ν' => 'ν', // greek small letter nu + 'ξ' => 'ξ', // greek small letter xi + 'ο' => 'ο', // greek small letter omicron + 'π' => 'π', // greek small letter pi + 'ρ' => 'ρ', // greek small letter rho + 'ς' => 'ς', // greek small letter final sigma + 'σ' => 'σ', // greek small letter sigma + 'τ' => 'τ', // greek small letter tau + 'υ' => 'υ', // greek small letter upsilon + 'φ' => 'φ', // greek small letter phi + 'χ' => 'χ', // greek small letter chi + 'ψ' => 'ψ', // greek small letter psi + 'ω' => 'ω', // greek small letter omega + 'ϑ' => 'ϑ', // greek small letter theta symbol + 'ϒ' => 'ϒ', // greek upsilon with hook symbol + 'ϖ' => 'ϖ', // greek pi symbol + ' ' => ' ', // en space + ' ' => ' ', // em space + ' ' => ' ', // thin space + '‌' => '‌', // zero width non-joiner + '‍' => '‍', // zero width joiner + '‎' => '‎', // left-to-right mark + '‏' => '‏', // right-to-left mark + '–' => '–', // en dash + '—' => '—', // em dash + '‘' => '‘', // left single quotation mark + '’' => '’', // right single quotation mark + '‚' => '‚', // single low-9 quotation mark + '“' => '“', // left double quotation mark + '”' => '”', // right double quotation mark + '„' => '„', // double low-9 quotation mark + '†' => '†', // dagger + '‡' => '‡', // double dagger + '•' => '•', // bullet + '…' => '…', // horizontal ellipsis + '‰' => '‰', // per mille sign + '′' => '′', // primeminutes + '″' => '″', // double prime + '‹' => '‹', // single left-pointing angle quotation mark + '›' => '›', // single right-pointing angle quotation mark + '‾' => '‾', // overline + '⁄' => '⁄', // fraction slash + '€' => '€', // euro sign + 'ℑ' => 'ℑ', // blackletter capital I + '℘' => '℘', // script capital P + 'ℜ' => 'ℜ', // blackletter capital R + '™' => '™', // trade mark sign + 'ℵ' => 'ℵ', // alef symbol + '←' => '←', // leftwards arrow + '↑' => '↑', // upwards arrow + '→' => '→', // rightwards arrow + '↓' => '↓', // downwards arrow + '↔' => '↔', // left right arrow + '↵' => '↵', // downwards arrow with corner leftwards + '⇐' => '⇐', // leftwards double arrow + '⇑' => '⇑', // upwards double arrow + '⇒' => '⇒', // rightwards double arrow + '⇓' => '⇓', // downwards double arrow + '⇔' => '⇔', // left right double arrow + '∀' => '∀', // for all + '∂' => '∂', // partial differential + '∃' => '∃', // there exists + '∅' => '∅', // empty set + '∇' => '∇', // nabla + '∈' => '∈', // element of + '∉' => '∉', // not an element of + '∋' => '∋', // contains as member + '∏' => '∏', // n-ary product + '∑' => '∑', // n-ary sumation + '−' => '−', // minus sign + '∗' => '∗', // asterisk operator + '√' => '√', // square root + '∝' => '∝', // proportional to + '∞' => '∞', // infinity + '∠' => '∠', // angle + '∧' => '∧', // logical and + '∨' => '∨', // logical or + '∩' => '∩', // intersection + '∪' => '∪', // union + '∫' => '∫', // integral + '∴' => '∴', // therefore + '∼' => '∼', // tilde operator + '≅' => '≅', // approximately equal to + '≈' => '≈', // almost equal to + '≠' => '≠', // not equal to + '≡' => '≡', // identical to + '≤' => '≤', // less-than or equal to + '≥' => '≥', // greater-than or equal to + '⊂' => '⊂', // subset of + '⊃' => '⊃', // superset of + '⊄' => '⊄', // not a subset of + '⊆' => '⊆', // subset of or equal to + '⊇' => '⊇', // superset of or equal to + '⊕' => '⊕', // circled plus + '⊗' => '⊗', // circled times + '⊥' => '⊥', // up tack + '⋅' => '⋅', // dot operator + '⌈' => '⌈', // left ceiling + '⌉' => '⌉', // right ceiling + '⌊' => '⌊', // left floor + '⌋' => '⌋', // right floor + '〈' => '⟨', // left-pointing angle bracket + '〉' => '⟩', // right-pointing angle bracket + '◊' => '◊', // lozenge + '♠' => '♠', // black spade suit + '♣' => '♣', // black club suit + '♥' => '♥', // black heart suit + '♦' => '♦' // black diam suit + ); + + // split entities for use in str_replace() + foreach($codes as $unicode_entity => $html_entity) { + $unicode_entities[] = $unicode_entity; + $html_entities[] = $html_entity; + } + } + + // transcode HTML entities to Unicode + if($to_unicode) + return str_replace($html_entities, $unicode_entities, $input); + + // transcode Unicode entities to HTML entities + else + return str_replace($unicode_entities, $html_entities, $input); +} + + + + +/** * transcode multi-byte characters to HTML representations for Unicode * * This function is aiming to preserve Unicode characters through storage in a ISO-8859-1 compliant system. @@ -329,7 +621,7 @@ function to_unicode($input) { // transcode HTML entities to Unicode entities - $input = utf8::transcode($input); + $input = transcode($input); // scan the whole string $output = ''; @@ -533,7 +825,7 @@ global $lang; global $languagecodes; echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" >"; - echo "<title>" . W_($a,$b) . "</title>\n"; + echo "<title>" . TRANSLATE_($a,$b) . "</title>\n"; if (isset($languagecodes[$lang])) { echo "<meta name=\"content-language\" content=\"" . $languagecodes[$lang] . "\">"; Modified: i18nHTML/src/translate.php =================================================================== --- i18nHTML/src/translate.php 2005-04-04 02:02:32 UTC (rev 583) +++ i18nHTML/src/translate.php 2005-04-04 02:54:54 UTC (rev 584) @@ -43,7 +43,7 @@ W("Destination language: "); W($lang); P(); -echo "<form method="POST" action=\"" . $i18nHTMLbase . "commitTranslation.php\">\n"; +echo "<form method=\"POST\" action=\"" . $i18nHTMLbase . "commitTranslation.php\">\n"; echo "<input type=hidden name=\"text\" value=\"" . urlencode($text) . "\">\n"; echo "<input type=hidden name=\"xlang\" value=\"$xlang\">\n"; echo "<input type=hidden name=\"back\" value=\"$back\">\n"; Modified: i18nHTML/src/vote.php =================================================================== --- i18nHTML/src/vote.php 2005-04-04 02:02:32 UTC (rev 583) +++ i18nHTML/src/vote.php 2005-04-04 02:54:54 UTC (rev 584) @@ -31,8 +31,8 @@ } $text = $_REQUEST['text']; $translation = $_REQUEST['translation']; -$u = urlencode($text); -$t = urlencode($translation); +$u = mysql_real_escape_string($text); +$t = mysql_real_escape_string(to_unicode($translation)); echo "text = " . $text . "<br>\n"; echo "translation = " . $translation . "<br>\n"; _______________________________________________ GNUnet-SVN mailing list GNUnet-SVN@gnu.org http://lists.gnu.org/mailman/listinfo/gnunet-svn