Context Navigation

← Previous Changeset
Next Changeset →

Changeset 39cc341

Timestamp:

2006-01-03T18:30:54Z (18 years ago)

Author:

Wilmer van der Gaast <wilmer@…>

Branches:

master

Children:

13c4cd3

Parents:

a252c1a

Message:

strip_html now replaces non-ASCII characters (entities like é) to
their UTF-8 versions instead of Latin1. Also added &[aeiou]uml; entities
to the list. However, I still don't know if this is really important anyway...

File:

: 1 edited

util.c (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

util.c

-                      ra252c1a
+                      r39cc341
+{
         char code[8];
         char is;
+        char is[4];
 } htmlentity_t;
 /* FIXME: This is ISO8859-1(5) centric, so might cause problems with other charsets. */
+static htmlentity_t ent[] =
+{
+        { "lt",     '<' },
+        { "gt",     '>' },
+        { "amp",    '&' },
+        { "quot",   '"' },
+        { "aacute", 'á' },
+        { "eacute", 'é' },
+        { "iacute", 'é' },
+        { "oacute", 'ó' },
+        { "uacute", 'ú' },
+        { "agrave", 'à' },
+        { "egrave", 'è' },
+        { "igrave", 'ì' },
+        { "ograve", 'ò' },
+        { "ugrave", 'ù' },
+        { "acirc",  'â' },
+        { "ecirc",  'ê' },
+        { "icirc",  'î' },
+        { "ocirc",  'ô' },
+        { "ucirc",  'û' },
+        { "nbsp",   ' ' },
+        { "",        0  }
+static const htmlentity_t ent[] =
+{
+        { "lt",     "<" },
+        { "gt",     ">" },
+        { "amp",    "&" },
+        { "quot",   "\"" },
+        { "aacute", "á" },
+        { "eacute", "é" },
+        { "iacute", "é" },
+        { "oacute", "ó" },
+        { "uacute", "ú" },
+        { "agrave", "à" },
+        { "egrave", "è" },
+        { "igrave", "ì" },
+        { "ograve", "ò" },
+        { "ugrave", "ù" },
+        { "acirc",  "â" },
+        { "ecirc",  "ê" },
+        { "icirc",  "î" },
+        { "ocirc",  "ô" },
+        { "ucirc",  "û" },
+        { "auml",   "ä" },
+        { "euml",   "ë" },
+        { "iuml",   "ï" },
+        { "ouml",   "ö" },
+        { "uuml",   "ü" },
+        { "nbsp",   " " },
+        { "",        ""  }
 };
 …
                                 if( g_strncasecmp( ent[i].code, cs, strlen( ent[i].code ) ) == 0 )
+                                {
+                                        *(s++) = ent[i].is;
+                                        int j;
+                                        for( j = 0; ent[i].is[j]; j ++ )
+                                                *(s++) = ent[i].is[j];
                                         matched = 1;
                                         break;

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 39cc341

Legend:

util.c

Download in other formats: